diff options
author | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2007-07-23 10:29:34 +0000 |
---|---|---|
committer | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2007-07-23 10:29:34 +0000 |
commit | b384588ad03cafafcf8f93051751c7129e6ece1b (patch) | |
tree | 475fe2da628f28d89516613741160f13b12f0842 /toolchain | |
parent | 0ad937e9151afd4415913fed4f1c14d15a2f46b2 (diff) |
- update ipmisensors
- add lzma vmlinuz
Diffstat (limited to 'toolchain')
3 files changed, 28931 insertions, 4189 deletions
diff --git a/toolchain/kernel-headers/linux-2.6.20.4-ipmisensors-20070314-1214.patch b/toolchain/kernel-headers/linux-2.6.20.4-ipmisensors-20070314-1214.patch index 506fcb4c7..aca57c37b 100644 --- a/toolchain/kernel-headers/linux-2.6.20.4-ipmisensors-20070314-1214.patch +++ b/toolchain/kernel-headers/linux-2.6.20.4-ipmisensors-20070314-1214.patch @@ -31,4195 +31,6 @@ diff -rduNp linux-2.6.20.3.orig/drivers/char/ipmi/ipmi_msghandler.c linux-2.6.20 EXPORT_SYMBOL(ipmi_user_set_run_to_completion); EXPORT_SYMBOL(ipmi_free_recv_msg); +EXPORT_SYMBOL(ipmi_get_bmcdevice); -diff -rduNp linux-2.6.20.3.orig/drivers/char/ipmi/ipmi_msghandler.c.orig linux-2.6.20.3/drivers/char/ipmi/ipmi_msghandler.c.orig ---- linux-2.6.20.3.orig/drivers/char/ipmi/ipmi_msghandler.c.orig 1970-01-01 01:00:00.000000000 +0100 -+++ linux-2.6.20.3/drivers/char/ipmi/ipmi_msghandler.c.orig 2007-03-14 14:22:33.000000000 +0100 -@@ -0,0 +1,4185 @@ -+/* -+ * ipmi_msghandler.c -+ * -+ * Incoming and outgoing message routing for an IPMI interface. -+ * -+ * Author: MontaVista Software, Inc. -+ * Corey Minyard <minyard@mvista.com> -+ * source@mvista.com -+ * -+ * Copyright 2002 MontaVista Software Inc. -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ * -+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -+ * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+#include <linux/module.h> -+#include <linux/errno.h> -+#include <asm/system.h> -+#include <linux/sched.h> -+#include <linux/poll.h> -+#include <linux/spinlock.h> -+#include <linux/mutex.h> -+#include <linux/slab.h> -+#include <linux/ipmi.h> -+#include <linux/ipmi_smi.h> -+#include <linux/notifier.h> -+#include <linux/init.h> -+#include <linux/proc_fs.h> -+#include <linux/rcupdate.h> -+ -+#define PFX "IPMI message handler: " -+ -+#define IPMI_DRIVER_VERSION "39.1" -+ -+static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void); -+static int ipmi_init_msghandler(void); -+ -+static int initialized; -+ -+#ifdef CONFIG_PROC_FS -+static struct proc_dir_entry *proc_ipmi_root; -+#endif /* CONFIG_PROC_FS */ -+ -+/* Remain in auto-maintenance mode for this amount of time (in ms). */ -+#define IPMI_MAINTENANCE_MODE_TIMEOUT 30000 -+ -+#define MAX_EVENTS_IN_QUEUE 25 -+ -+/* Don't let a message sit in a queue forever, always time it with at lest -+ the max message timer. This is in milliseconds. */ -+#define MAX_MSG_TIMEOUT 60000 -+ -+ -+/* -+ * The main "user" data structure. -+ */ -+struct ipmi_user -+{ -+ struct list_head link; -+ -+ /* Set to "0" when the user is destroyed. */ -+ int valid; -+ -+ struct kref refcount; -+ -+ /* The upper layer that handles receive messages. */ -+ struct ipmi_user_hndl *handler; -+ void *handler_data; -+ -+ /* The interface this user is bound to. */ -+ ipmi_smi_t intf; -+ -+ /* Does this interface receive IPMI events? */ -+ int gets_events; -+}; -+ -+struct cmd_rcvr -+{ -+ struct list_head link; -+ -+ ipmi_user_t user; -+ unsigned char netfn; -+ unsigned char cmd; -+ unsigned int chans; -+ -+ /* -+ * This is used to form a linked lised during mass deletion. -+ * Since this is in an RCU list, we cannot use the link above -+ * or change any data until the RCU period completes. So we -+ * use this next variable during mass deletion so we can have -+ * a list and don't have to wait and restart the search on -+ * every individual deletion of a command. */ -+ struct cmd_rcvr *next; -+}; -+ -+struct seq_table -+{ -+ unsigned int inuse : 1; -+ unsigned int broadcast : 1; -+ -+ unsigned long timeout; -+ unsigned long orig_timeout; -+ unsigned int retries_left; -+ -+ /* To verify on an incoming send message response that this is -+ the message that the response is for, we keep a sequence id -+ and increment it every time we send a message. */ -+ long seqid; -+ -+ /* This is held so we can properly respond to the message on a -+ timeout, and it is used to hold the temporary data for -+ retransmission, too. */ -+ struct ipmi_recv_msg *recv_msg; -+}; -+ -+/* Store the information in a msgid (long) to allow us to find a -+ sequence table entry from the msgid. */ -+#define STORE_SEQ_IN_MSGID(seq, seqid) (((seq&0xff)<<26) | (seqid&0x3ffffff)) -+ -+#define GET_SEQ_FROM_MSGID(msgid, seq, seqid) \ -+ do { \ -+ seq = ((msgid >> 26) & 0x3f); \ -+ seqid = (msgid & 0x3fffff); \ -+ } while (0) -+ -+#define NEXT_SEQID(seqid) (((seqid) + 1) & 0x3fffff) -+ -+struct ipmi_channel -+{ -+ unsigned char medium; -+ unsigned char protocol; -+ -+ /* My slave address. This is initialized to IPMI_BMC_SLAVE_ADDR, -+ but may be changed by the user. */ -+ unsigned char address; -+ -+ /* My LUN. This should generally stay the SMS LUN, but just in -+ case... */ -+ unsigned char lun; -+}; -+ -+#ifdef CONFIG_PROC_FS -+struct ipmi_proc_entry -+{ -+ char *name; -+ struct ipmi_proc_entry *next; -+}; -+#endif -+ -+struct bmc_device -+{ -+ struct platform_device *dev; -+ struct ipmi_device_id id; -+ unsigned char guid[16]; -+ int guid_set; -+ -+ struct kref refcount; -+ -+ /* bmc device attributes */ -+ struct device_attribute device_id_attr; -+ struct device_attribute provides_dev_sdrs_attr; -+ struct device_attribute revision_attr; -+ struct device_attribute firmware_rev_attr; -+ struct device_attribute version_attr; -+ struct device_attribute add_dev_support_attr; -+ struct device_attribute manufacturer_id_attr; -+ struct device_attribute product_id_attr; -+ struct device_attribute guid_attr; -+ struct device_attribute aux_firmware_rev_attr; -+}; -+ -+#define IPMI_IPMB_NUM_SEQ 64 -+#define IPMI_MAX_CHANNELS 16 -+struct ipmi_smi -+{ -+ /* What interface number are we? */ -+ int intf_num; -+ -+ struct kref refcount; -+ -+ /* Used for a list of interfaces. */ -+ struct list_head link; -+ -+ /* The list of upper layers that are using me. seq_lock -+ * protects this. */ -+ struct list_head users; -+ -+ /* Information to supply to users. */ -+ unsigned char ipmi_version_major; -+ unsigned char ipmi_version_minor; -+ -+ /* Used for wake ups at startup. */ -+ wait_queue_head_t waitq; -+ -+ struct bmc_device *bmc; -+ char *my_dev_name; -+ char *sysfs_name; -+ -+ /* This is the lower-layer's sender routine. Note that you -+ * must either be holding the ipmi_interfaces_mutex or be in -+ * an umpreemptible region to use this. You must fetch the -+ * value into a local variable and make sure it is not NULL. */ -+ struct ipmi_smi_handlers *handlers; -+ void *send_info; -+ -+#ifdef CONFIG_PROC_FS -+ /* A list of proc entries for this interface. This does not -+ need a lock, only one thread creates it and only one thread -+ destroys it. */ -+ spinlock_t proc_entry_lock; -+ struct ipmi_proc_entry *proc_entries; -+#endif -+ -+ /* Driver-model device for the system interface. */ -+ struct device *si_dev; -+ -+ /* A table of sequence numbers for this interface. We use the -+ sequence numbers for IPMB messages that go out of the -+ interface to match them up with their responses. A routine -+ is called periodically to time the items in this list. */ -+ spinlock_t seq_lock; -+ struct seq_table seq_table[IPMI_IPMB_NUM_SEQ]; -+ int curr_seq; -+ -+ /* Messages that were delayed for some reason (out of memory, -+ for instance), will go in here to be processed later in a -+ periodic timer interrupt. */ -+ spinlock_t waiting_msgs_lock; -+ struct list_head waiting_msgs; -+ -+ /* The list of command receivers that are registered for commands -+ on this interface. */ -+ struct mutex cmd_rcvrs_mutex; -+ struct list_head cmd_rcvrs; -+ -+ /* Events that were queues because no one was there to receive -+ them. */ -+ spinlock_t events_lock; /* For dealing with event stuff. */ -+ struct list_head waiting_events; -+ unsigned int waiting_events_count; /* How many events in queue? */ -+ int delivering_events; -+ -+ /* The event receiver for my BMC, only really used at panic -+ shutdown as a place to store this. */ -+ unsigned char event_receiver; -+ unsigned char event_receiver_lun; -+ unsigned char local_sel_device; -+ unsigned char local_event_generator; -+ -+ /* For handling of maintenance mode. */ -+ int maintenance_mode; -+ int maintenance_mode_enable; -+ int auto_maintenance_timeout; -+ spinlock_t maintenance_mode_lock; /* Used in a timer... */ -+ -+ /* A cheap hack, if this is non-null and a message to an -+ interface comes in with a NULL user, call this routine with -+ it. Note that the message will still be freed by the -+ caller. This only works on the system interface. */ -+ void (*null_user_handler)(ipmi_smi_t intf, struct ipmi_recv_msg *msg); -+ -+ /* When we are scanning the channels for an SMI, this will -+ tell which channel we are scanning. */ -+ int curr_channel; -+ -+ /* Channel information */ -+ struct ipmi_channel channels[IPMI_MAX_CHANNELS]; -+ -+ /* Proc FS stuff. */ -+ struct proc_dir_entry *proc_dir; -+ char proc_dir_name[10]; -+ -+ spinlock_t counter_lock; /* For making counters atomic. */ -+ -+ /* Commands we got that were invalid. */ -+ unsigned int sent_invalid_commands; -+ -+ /* Commands we sent to the MC. */ -+ unsigned int sent_local_commands; -+ /* Responses from the MC that were delivered to a user. */ -+ unsigned int handled_local_responses; -+ /* Responses from the MC that were not delivered to a user. */ -+ unsigned int unhandled_local_responses; -+ -+ /* Commands we sent out to the IPMB bus. */ -+ unsigned int sent_ipmb_commands; -+ /* Commands sent on the IPMB that had errors on the SEND CMD */ -+ unsigned int sent_ipmb_command_errs; -+ /* Each retransmit increments this count. */ -+ unsigned int retransmitted_ipmb_commands; -+ /* When a message times out (runs out of retransmits) this is -+ incremented. */ -+ unsigned int timed_out_ipmb_commands; -+ -+ /* This is like above, but for broadcasts. Broadcasts are -+ *not* included in the above count (they are expected to -+ time out). */ -+ unsigned int timed_out_ipmb_broadcasts; -+ -+ /* Responses I have sent to the IPMB bus. */ -+ unsigned int sent_ipmb_responses; -+ -+ /* The response was delivered to the user. */ -+ unsigned int handled_ipmb_responses; -+ /* The response had invalid data in it. */ -+ unsigned int invalid_ipmb_responses; -+ /* The response didn't have anyone waiting for it. */ -+ unsigned int unhandled_ipmb_responses; -+ -+ /* Commands we sent out to the IPMB bus. */ -+ unsigned int sent_lan_commands; -+ /* Commands sent on the IPMB that had errors on the SEND CMD */ -+ unsigned int sent_lan_command_errs; -+ /* Each retransmit increments this count. */ -+ unsigned int retransmitted_lan_commands; -+ /* When a message times out (runs out of retransmits) this is -+ incremented. */ -+ unsigned int timed_out_lan_commands; -+ -+ /* Responses I have sent to the IPMB bus. */ -+ unsigned int sent_lan_responses; -+ -+ /* The response was delivered to the user. */ -+ unsigned int handled_lan_responses; -+ /* The response had invalid data in it. */ -+ unsigned int invalid_lan_responses; -+ /* The response didn't have anyone waiting for it. */ -+ unsigned int unhandled_lan_responses; -+ -+ /* The command was delivered to the user. */ -+ unsigned int handled_commands; -+ /* The command had invalid data in it. */ -+ unsigned int invalid_commands; -+ /* The command didn't have anyone waiting for it. */ -+ unsigned int unhandled_commands; -+ -+ /* Invalid data in an event. */ -+ unsigned int invalid_events; -+ /* Events that were received with the proper format. */ -+ unsigned int events; -+}; -+#define to_si_intf_from_dev(device) container_of(device, struct ipmi_smi, dev) -+ -+/** -+ * The driver model view of the IPMI messaging driver. -+ */ -+static struct device_driver ipmidriver = { -+ .name = "ipmi", -+ .bus = &platform_bus_type -+}; -+static DEFINE_MUTEX(ipmidriver_mutex); -+ -+static struct list_head ipmi_interfaces = LIST_HEAD_INIT(ipmi_interfaces); -+static DEFINE_MUTEX(ipmi_interfaces_mutex); -+ -+/* List of watchers that want to know when smi's are added and -+ deleted. */ -+static struct list_head smi_watchers = LIST_HEAD_INIT(smi_watchers); -+static DEFINE_MUTEX(smi_watchers_mutex); -+ -+ -+static void free_recv_msg_list(struct list_head *q) -+{ -+ struct ipmi_recv_msg *msg, *msg2; -+ -+ list_for_each_entry_safe(msg, msg2, q, link) { -+ list_del(&msg->link); -+ ipmi_free_recv_msg(msg); -+ } -+} -+ -+static void free_smi_msg_list(struct list_head *q) -+{ -+ struct ipmi_smi_msg *msg, *msg2; -+ -+ list_for_each_entry_safe(msg, msg2, q, link) { -+ list_del(&msg->link); -+ ipmi_free_smi_msg(msg); -+ } -+} -+ -+static void clean_up_interface_data(ipmi_smi_t intf) -+{ -+ int i; -+ struct cmd_rcvr *rcvr, *rcvr2; -+ struct list_head list; -+ -+ free_smi_msg_list(&intf->waiting_msgs); -+ free_recv_msg_list(&intf->waiting_events); -+ -+ /* Wholesale remove all the entries from the list in the -+ * interface and wait for RCU to know that none are in use. */ -+ mutex_lock(&intf->cmd_rcvrs_mutex); -+ list_add_rcu(&list, &intf->cmd_rcvrs); -+ list_del_rcu(&intf->cmd_rcvrs); -+ mutex_unlock(&intf->cmd_rcvrs_mutex); -+ synchronize_rcu(); -+ -+ list_for_each_entry_safe(rcvr, rcvr2, &list, link) -+ kfree(rcvr); -+ -+ for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { -+ if ((intf->seq_table[i].inuse) -+ && (intf->seq_table[i].recv_msg)) -+ { -+ ipmi_free_recv_msg(intf->seq_table[i].recv_msg); -+ } -+ } -+} -+ -+static void intf_free(struct kref *ref) -+{ -+ ipmi_smi_t intf = container_of(ref, struct ipmi_smi, refcount); -+ -+ clean_up_interface_data(intf); -+ kfree(intf); -+} -+ -+struct watcher_entry { -+ int intf_num; -+ ipmi_smi_t intf; -+ struct list_head link; -+}; -+ -+int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher) -+{ -+ ipmi_smi_t intf; -+ struct list_head to_deliver = LIST_HEAD_INIT(to_deliver); -+ struct watcher_entry *e, *e2; -+ -+ mutex_lock(&smi_watchers_mutex); -+ -+ mutex_lock(&ipmi_interfaces_mutex); -+ -+ /* Build a list of things to deliver. */ -+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { -+ if (intf->intf_num == -1) -+ continue; -+ e = kmalloc(sizeof(*e), GFP_KERNEL); -+ if (!e) -+ goto out_err; -+ kref_get(&intf->refcount); -+ e->intf = intf; -+ e->intf_num = intf->intf_num; -+ list_add_tail(&e->link, &to_deliver); -+ } -+ -+ /* We will succeed, so add it to the list. */ -+ list_add(&watcher->link, &smi_watchers); -+ -+ mutex_unlock(&ipmi_interfaces_mutex); -+ -+ list_for_each_entry_safe(e, e2, &to_deliver, link) { -+ list_del(&e->link); -+ watcher->new_smi(e->intf_num, e->intf->si_dev); -+ kref_put(&e->intf->refcount, intf_free); -+ kfree(e); -+ } -+ -+ mutex_unlock(&smi_watchers_mutex); -+ -+ return 0; -+ -+ out_err: -+ mutex_unlock(&ipmi_interfaces_mutex); -+ mutex_unlock(&smi_watchers_mutex); -+ list_for_each_entry_safe(e, e2, &to_deliver, link) { -+ list_del(&e->link); -+ kref_put(&e->intf->refcount, intf_free); -+ kfree(e); -+ } -+ return -ENOMEM; -+} -+ -+int ipmi_smi_watcher_unregister(struct ipmi_smi_watcher *watcher) -+{ -+ mutex_lock(&smi_watchers_mutex); -+ list_del(&(watcher->link)); -+ mutex_unlock(&smi_watchers_mutex); -+ return 0; -+} -+ -+/* -+ * Must be called with smi_watchers_mutex held. -+ */ -+static void -+call_smi_watchers(int i, struct device *dev) -+{ -+ struct ipmi_smi_watcher *w; -+ -+ list_for_each_entry(w, &smi_watchers, link) { -+ if (try_module_get(w->owner)) { -+ w->new_smi(i, dev); -+ module_put(w->owner); -+ } -+ } -+} -+ -+static int -+ipmi_addr_equal(struct ipmi_addr *addr1, struct ipmi_addr *addr2) -+{ -+ if (addr1->addr_type != addr2->addr_type) -+ return 0; -+ -+ if (addr1->channel != addr2->channel) -+ return 0; -+ -+ if (addr1->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) { -+ struct ipmi_system_interface_addr *smi_addr1 -+ = (struct ipmi_system_interface_addr *) addr1; -+ struct ipmi_system_interface_addr *smi_addr2 -+ = (struct ipmi_system_interface_addr *) addr2; -+ return (smi_addr1->lun == smi_addr2->lun); -+ } -+ -+ if ((addr1->addr_type == IPMI_IPMB_ADDR_TYPE) -+ || (addr1->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) -+ { -+ struct ipmi_ipmb_addr *ipmb_addr1 -+ = (struct ipmi_ipmb_addr *) addr1; -+ struct ipmi_ipmb_addr *ipmb_addr2 -+ = (struct ipmi_ipmb_addr *) addr2; -+ -+ return ((ipmb_addr1->slave_addr == ipmb_addr2->slave_addr) -+ && (ipmb_addr1->lun == ipmb_addr2->lun)); -+ } -+ -+ if (addr1->addr_type == IPMI_LAN_ADDR_TYPE) { -+ struct ipmi_lan_addr *lan_addr1 -+ = (struct ipmi_lan_addr *) addr1; -+ struct ipmi_lan_addr *lan_addr2 -+ = (struct ipmi_lan_addr *) addr2; -+ -+ return ((lan_addr1->remote_SWID == lan_addr2->remote_SWID) -+ && (lan_addr1->local_SWID == lan_addr2->local_SWID) -+ && (lan_addr1->session_handle -+ == lan_addr2->session_handle) -+ && (lan_addr1->lun == lan_addr2->lun)); -+ } -+ -+ return 1; -+} -+ -+int ipmi_validate_addr(struct ipmi_addr *addr, int len) -+{ -+ if (len < sizeof(struct ipmi_system_interface_addr)) { -+ return -EINVAL; -+ } -+ -+ if (addr->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) { -+ if (addr->channel != IPMI_BMC_CHANNEL) -+ return -EINVAL; -+ return 0; -+ } -+ -+ if ((addr->channel == IPMI_BMC_CHANNEL) -+ || (addr->channel >= IPMI_MAX_CHANNELS) -+ || (addr->channel < 0)) -+ return -EINVAL; -+ -+ if ((addr->addr_type == IPMI_IPMB_ADDR_TYPE) -+ || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) -+ { -+ if (len < sizeof(struct ipmi_ipmb_addr)) { -+ return -EINVAL; -+ } -+ return 0; -+ } -+ -+ if (addr->addr_type == IPMI_LAN_ADDR_TYPE) { -+ if (len < sizeof(struct ipmi_lan_addr)) { -+ return -EINVAL; -+ } -+ return 0; -+ } -+ -+ return -EINVAL; -+} -+ -+unsigned int ipmi_addr_length(int addr_type) -+{ -+ if (addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) -+ return sizeof(struct ipmi_system_interface_addr); -+ -+ if ((addr_type == IPMI_IPMB_ADDR_TYPE) -+ || (addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) -+ { -+ return sizeof(struct ipmi_ipmb_addr); -+ } -+ -+ if (addr_type == IPMI_LAN_ADDR_TYPE) -+ return sizeof(struct ipmi_lan_addr); -+ -+ return 0; -+} -+ -+static void deliver_response(struct ipmi_recv_msg *msg) -+{ -+ if (!msg->user) { -+ ipmi_smi_t intf = msg->user_msg_data; -+ unsigned long flags; -+ -+ /* Special handling for NULL users. */ -+ if (intf->null_user_handler) { -+ intf->null_user_handler(intf, msg); -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->handled_local_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ } else { -+ /* No handler, so give up. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->unhandled_local_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ } -+ ipmi_free_recv_msg(msg); -+ } else { -+ ipmi_user_t user = msg->user; -+ user->handler->ipmi_recv_hndl(msg, user->handler_data); -+ } -+} -+ -+static void -+deliver_err_response(struct ipmi_recv_msg *msg, int err) -+{ -+ msg->recv_type = IPMI_RESPONSE_RECV_TYPE; -+ msg->msg_data[0] = err; -+ msg->msg.netfn |= 1; /* Convert to a response. */ -+ msg->msg.data_len = 1; -+ msg->msg.data = msg->msg_data; -+ deliver_response(msg); -+} -+ -+/* Find the next sequence number not being used and add the given -+ message with the given timeout to the sequence table. This must be -+ called with the interface's seq_lock held. */ -+static int intf_next_seq(ipmi_smi_t intf, -+ struct ipmi_recv_msg *recv_msg, -+ unsigned long timeout, -+ int retries, -+ int broadcast, -+ unsigned char *seq, -+ long *seqid) -+{ -+ int rv = 0; -+ unsigned int i; -+ -+ for (i = intf->curr_seq; -+ (i+1)%IPMI_IPMB_NUM_SEQ != intf->curr_seq; -+ i = (i+1)%IPMI_IPMB_NUM_SEQ) -+ { -+ if (!intf->seq_table[i].inuse) -+ break; -+ } -+ -+ if (!intf->seq_table[i].inuse) { -+ intf->seq_table[i].recv_msg = recv_msg; -+ -+ /* Start with the maximum timeout, when the send response -+ comes in we will start the real timer. */ -+ intf->seq_table[i].timeout = MAX_MSG_TIMEOUT; -+ intf->seq_table[i].orig_timeout = timeout; -+ intf->seq_table[i].retries_left = retries; -+ intf->seq_table[i].broadcast = broadcast; -+ intf->seq_table[i].inuse = 1; -+ intf->seq_table[i].seqid = NEXT_SEQID(intf->seq_table[i].seqid); -+ *seq = i; -+ *seqid = intf->seq_table[i].seqid; -+ intf->curr_seq = (i+1)%IPMI_IPMB_NUM_SEQ; -+ } else { -+ rv = -EAGAIN; -+ } -+ -+ return rv; -+} -+ -+/* Return the receive message for the given sequence number and -+ release the sequence number so it can be reused. Some other data -+ is passed in to be sure the message matches up correctly (to help -+ guard against message coming in after their timeout and the -+ sequence number being reused). */ -+static int intf_find_seq(ipmi_smi_t intf, -+ unsigned char seq, -+ short channel, -+ unsigned char cmd, -+ unsigned char netfn, -+ struct ipmi_addr *addr, -+ struct ipmi_recv_msg **recv_msg) -+{ -+ int rv = -ENODEV; -+ unsigned long flags; -+ -+ if (seq >= IPMI_IPMB_NUM_SEQ) -+ return -EINVAL; -+ -+ spin_lock_irqsave(&(intf->seq_lock), flags); -+ if (intf->seq_table[seq].inuse) { -+ struct ipmi_recv_msg *msg = intf->seq_table[seq].recv_msg; -+ -+ if ((msg->addr.channel == channel) -+ && (msg->msg.cmd == cmd) -+ && (msg->msg.netfn == netfn) -+ && (ipmi_addr_equal(addr, &(msg->addr)))) -+ { -+ *recv_msg = msg; -+ intf->seq_table[seq].inuse = 0; -+ rv = 0; -+ } -+ } -+ spin_unlock_irqrestore(&(intf->seq_lock), flags); -+ -+ return rv; -+} -+ -+ -+/* Start the timer for a specific sequence table entry. */ -+static int intf_start_seq_timer(ipmi_smi_t intf, -+ long msgid) -+{ -+ int rv = -ENODEV; -+ unsigned long flags; -+ unsigned char seq; -+ unsigned long seqid; -+ -+ -+ GET_SEQ_FROM_MSGID(msgid, seq, seqid); -+ -+ spin_lock_irqsave(&(intf->seq_lock), flags); -+ /* We do this verification because the user can be deleted -+ while a message is outstanding. */ -+ if ((intf->seq_table[seq].inuse) -+ && (intf->seq_table[seq].seqid == seqid)) -+ { -+ struct seq_table *ent = &(intf->seq_table[seq]); -+ ent->timeout = ent->orig_timeout; -+ rv = 0; -+ } -+ spin_unlock_irqrestore(&(intf->seq_lock), flags); -+ -+ return rv; -+} -+ -+/* Got an error for the send message for a specific sequence number. */ -+static int intf_err_seq(ipmi_smi_t intf, -+ long msgid, -+ unsigned int err) -+{ -+ int rv = -ENODEV; -+ unsigned long flags; -+ unsigned char seq; -+ unsigned long seqid; -+ struct ipmi_recv_msg *msg = NULL; -+ -+ -+ GET_SEQ_FROM_MSGID(msgid, seq, seqid); -+ -+ spin_lock_irqsave(&(intf->seq_lock), flags); -+ /* We do this verification because the user can be deleted -+ while a message is outstanding. */ -+ if ((intf->seq_table[seq].inuse) -+ && (intf->seq_table[seq].seqid == seqid)) -+ { -+ struct seq_table *ent = &(intf->seq_table[seq]); -+ -+ ent->inuse = 0; -+ msg = ent->recv_msg; -+ rv = 0; -+ } -+ spin_unlock_irqrestore(&(intf->seq_lock), flags); -+ -+ if (msg) -+ deliver_err_response(msg, err); -+ -+ return rv; -+} -+ -+ -+int ipmi_create_user(unsigned int if_num, -+ struct ipmi_user_hndl *handler, -+ void *handler_data, -+ ipmi_user_t *user) -+{ -+ unsigned long flags; -+ ipmi_user_t new_user; -+ int rv = 0; -+ ipmi_smi_t intf; -+ -+ /* There is no module usecount here, because it's not -+ required. Since this can only be used by and called from -+ other modules, they will implicitly use this module, and -+ thus this can't be removed unless the other modules are -+ removed. */ -+ -+ if (handler == NULL) -+ return -EINVAL; -+ -+ /* Make sure the driver is actually initialized, this handles -+ problems with initialization order. */ -+ if (!initialized) { -+ rv = ipmi_init_msghandler(); -+ if (rv) -+ return rv; -+ -+ /* The init code doesn't return an error if it was turned -+ off, but it won't initialize. Check that. */ -+ if (!initialized) -+ return -ENODEV; -+ } -+ -+ new_user = kmalloc(sizeof(*new_user), GFP_KERNEL); -+ if (!new_user) -+ return -ENOMEM; -+ -+ mutex_lock(&ipmi_interfaces_mutex); -+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { -+ if (intf->intf_num == if_num) -+ goto found; -+ } -+ /* Not found, return an error */ -+ rv = -EINVAL; -+ goto out_kfree; -+ -+ found: -+ /* Note that each existing user holds a refcount to the interface. */ -+ kref_get(&intf->refcount); -+ -+ kref_init(&new_user->refcount); -+ new_user->handler = handler; -+ new_user->handler_data = handler_data; -+ new_user->intf = intf; -+ new_user->gets_events = 0; -+ -+ if (!try_module_get(intf->handlers->owner)) { -+ rv = -ENODEV; -+ goto out_kref; -+ } -+ -+ if (intf->handlers->inc_usecount) { -+ rv = intf->handlers->inc_usecount(intf->send_info); -+ if (rv) { -+ module_put(intf->handlers->owner); -+ goto out_kref; -+ } -+ } -+ -+ /* Hold the lock so intf->handlers is guaranteed to be good -+ * until now */ -+ mutex_unlock(&ipmi_interfaces_mutex); -+ -+ new_user->valid = 1; -+ spin_lock_irqsave(&intf->seq_lock, flags); -+ list_add_rcu(&new_user->link, &intf->users); -+ spin_unlock_irqrestore(&intf->seq_lock, flags); -+ *user = new_user; -+ return 0; -+ -+out_kref: -+ kref_put(&intf->refcount, intf_free); -+out_kfree: -+ mutex_unlock(&ipmi_interfaces_mutex); -+ kfree(new_user); -+ return rv; -+} -+ -+static void free_user(struct kref *ref) -+{ -+ ipmi_user_t user = container_of(ref, struct ipmi_user, refcount); -+ kfree(user); -+} -+ -+int ipmi_destroy_user(ipmi_user_t user) -+{ -+ ipmi_smi_t intf = user->intf; -+ int i; -+ unsigned long flags; -+ struct cmd_rcvr *rcvr; -+ struct cmd_rcvr *rcvrs = NULL; -+ -+ user->valid = 0; -+ -+ /* Remove the user from the interface's sequence table. */ -+ spin_lock_irqsave(&intf->seq_lock, flags); -+ list_del_rcu(&user->link); -+ -+ for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { -+ if (intf->seq_table[i].inuse -+ && (intf->seq_table[i].recv_msg->user == user)) -+ { -+ intf->seq_table[i].inuse = 0; -+ ipmi_free_recv_msg(intf->seq_table[i].recv_msg); -+ } -+ } -+ spin_unlock_irqrestore(&intf->seq_lock, flags); -+ -+ /* -+ * Remove the user from the command receiver's table. First -+ * we build a list of everything (not using the standard link, -+ * since other things may be using it till we do -+ * synchronize_rcu()) then free everything in that list. -+ */ -+ mutex_lock(&intf->cmd_rcvrs_mutex); -+ list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { -+ if (rcvr->user == user) { -+ list_del_rcu(&rcvr->link); -+ rcvr->next = rcvrs; -+ rcvrs = rcvr; -+ } -+ } -+ mutex_unlock(&intf->cmd_rcvrs_mutex); -+ synchronize_rcu(); -+ while (rcvrs) { -+ rcvr = rcvrs; -+ rcvrs = rcvr->next; -+ kfree(rcvr); -+ } -+ -+ mutex_lock(&ipmi_interfaces_mutex); -+ if (intf->handlers) { -+ module_put(intf->handlers->owner); -+ if (intf->handlers->dec_usecount) -+ intf->handlers->dec_usecount(intf->send_info); -+ } -+ mutex_unlock(&ipmi_interfaces_mutex); -+ -+ kref_put(&intf->refcount, intf_free); -+ -+ kref_put(&user->refcount, free_user); -+ -+ return 0; -+} -+ -+void ipmi_get_version(ipmi_user_t user, -+ unsigned char *major, -+ unsigned char *minor) -+{ -+ *major = user->intf->ipmi_version_major; -+ *minor = user->intf->ipmi_version_minor; -+} -+ -+int ipmi_set_my_address(ipmi_user_t user, -+ unsigned int channel, -+ unsigned char address) -+{ -+ if (channel >= IPMI_MAX_CHANNELS) -+ return -EINVAL; -+ user->intf->channels[channel].address = address; -+ return 0; -+} -+ -+int ipmi_get_my_address(ipmi_user_t user, -+ unsigned int channel, -+ unsigned char *address) -+{ -+ if (channel >= IPMI_MAX_CHANNELS) -+ return -EINVAL; -+ *address = user->intf->channels[channel].address; -+ return 0; -+} -+ -+int ipmi_set_my_LUN(ipmi_user_t user, -+ unsigned int channel, -+ unsigned char LUN) -+{ -+ if (channel >= IPMI_MAX_CHANNELS) -+ return -EINVAL; -+ user->intf->channels[channel].lun = LUN & 0x3; -+ return 0; -+} -+ -+int ipmi_get_my_LUN(ipmi_user_t user, -+ unsigned int channel, -+ unsigned char *address) -+{ -+ if (channel >= IPMI_MAX_CHANNELS) -+ return -EINVAL; -+ *address = user->intf->channels[channel].lun; -+ return 0; -+} -+ -+int ipmi_get_maintenance_mode(ipmi_user_t user) -+{ -+ int mode; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&user->intf->maintenance_mode_lock, flags); -+ mode = user->intf->maintenance_mode; -+ spin_unlock_irqrestore(&user->intf->maintenance_mode_lock, flags); -+ -+ return mode; -+} -+EXPORT_SYMBOL(ipmi_get_maintenance_mode); -+ -+static void maintenance_mode_update(ipmi_smi_t intf) -+{ -+ if (intf->handlers->set_maintenance_mode) -+ intf->handlers->set_maintenance_mode( -+ intf->send_info, intf->maintenance_mode_enable); -+} -+ -+int ipmi_set_maintenance_mode(ipmi_user_t user, int mode) -+{ -+ int rv = 0; -+ unsigned long flags; -+ ipmi_smi_t intf = user->intf; -+ -+ spin_lock_irqsave(&intf->maintenance_mode_lock, flags); -+ if (intf->maintenance_mode != mode) { -+ switch (mode) { -+ case IPMI_MAINTENANCE_MODE_AUTO: -+ intf->maintenance_mode = mode; -+ intf->maintenance_mode_enable -+ = (intf->auto_maintenance_timeout > 0); -+ break; -+ -+ case IPMI_MAINTENANCE_MODE_OFF: -+ intf->maintenance_mode = mode; -+ intf->maintenance_mode_enable = 0; -+ break; -+ -+ case IPMI_MAINTENANCE_MODE_ON: -+ intf->maintenance_mode = mode; -+ intf->maintenance_mode_enable = 1; -+ break; -+ -+ default: -+ rv = -EINVAL; -+ goto out_unlock; -+ } -+ -+ maintenance_mode_update(intf); -+ } -+ out_unlock: -+ spin_unlock_irqrestore(&intf->maintenance_mode_lock, flags); -+ -+ return rv; -+} -+EXPORT_SYMBOL(ipmi_set_maintenance_mode); -+ -+int ipmi_set_gets_events(ipmi_user_t user, int val) -+{ -+ unsigned long flags; -+ ipmi_smi_t intf = user->intf; -+ struct ipmi_recv_msg *msg, *msg2; -+ struct list_head msgs; -+ -+ INIT_LIST_HEAD(&msgs); -+ -+ spin_lock_irqsave(&intf->events_lock, flags); -+ user->gets_events = val; -+ -+ if (intf->delivering_events) -+ /* -+ * Another thread is delivering events for this, so -+ * let it handle any new events. -+ */ -+ goto out; -+ -+ /* Deliver any queued events. */ -+ while (user->gets_events && !list_empty(&intf->waiting_events)) { -+ list_for_each_entry_safe(msg, msg2, &intf->waiting_events, link) -+ list_move_tail(&msg->link, &msgs); -+ intf->waiting_events_count = 0; -+ -+ intf->delivering_events = 1; -+ spin_unlock_irqrestore(&intf->events_lock, flags); -+ -+ list_for_each_entry_safe(msg, msg2, &msgs, link) { -+ msg->user = user; -+ kref_get(&user->refcount); -+ deliver_response(msg); -+ } -+ -+ spin_lock_irqsave(&intf->events_lock, flags); -+ intf->delivering_events = 0; -+ } -+ -+ out: -+ spin_unlock_irqrestore(&intf->events_lock, flags); -+ -+ return 0; -+} -+ -+static struct cmd_rcvr *find_cmd_rcvr(ipmi_smi_t intf, -+ unsigned char netfn, -+ unsigned char cmd, -+ unsigned char chan) -+{ -+ struct cmd_rcvr *rcvr; -+ -+ list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { -+ if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd) -+ && (rcvr->chans & (1 << chan))) -+ return rcvr; -+ } -+ return NULL; -+} -+ -+static int is_cmd_rcvr_exclusive(ipmi_smi_t intf, -+ unsigned char netfn, -+ unsigned char cmd, -+ unsigned int chans) -+{ -+ struct cmd_rcvr *rcvr; -+ -+ list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) { -+ if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd) -+ && (rcvr->chans & chans)) -+ return 0; -+ } -+ return 1; -+} -+ -+int ipmi_register_for_cmd(ipmi_user_t user, -+ unsigned char netfn, -+ unsigned char cmd, -+ unsigned int chans) -+{ -+ ipmi_smi_t intf = user->intf; -+ struct cmd_rcvr *rcvr; -+ int rv = 0; -+ -+ -+ rcvr = kmalloc(sizeof(*rcvr), GFP_KERNEL); -+ if (!rcvr) -+ return -ENOMEM; -+ rcvr->cmd = cmd; -+ rcvr->netfn = netfn; -+ rcvr->chans = chans; -+ rcvr->user = user; -+ -+ mutex_lock(&intf->cmd_rcvrs_mutex); -+ /* Make sure the command/netfn is not already registered. */ -+ if (!is_cmd_rcvr_exclusive(intf, netfn, cmd, chans)) { -+ rv = -EBUSY; -+ goto out_unlock; -+ } -+ -+ list_add_rcu(&rcvr->link, &intf->cmd_rcvrs); -+ -+ out_unlock: -+ mutex_unlock(&intf->cmd_rcvrs_mutex); -+ if (rv) -+ kfree(rcvr); -+ -+ return rv; -+} -+ -+int ipmi_unregister_for_cmd(ipmi_user_t user, -+ unsigned char netfn, -+ unsigned char cmd, -+ unsigned int chans) -+{ -+ ipmi_smi_t intf = user->intf; -+ struct cmd_rcvr *rcvr; -+ struct cmd_rcvr *rcvrs = NULL; -+ int i, rv = -ENOENT; -+ -+ mutex_lock(&intf->cmd_rcvrs_mutex); -+ for (i = 0; i < IPMI_NUM_CHANNELS; i++) { -+ if (((1 << i) & chans) == 0) -+ continue; -+ rcvr = find_cmd_rcvr(intf, netfn, cmd, i); -+ if (rcvr == NULL) -+ continue; -+ if (rcvr->user == user) { -+ rv = 0; -+ rcvr->chans &= ~chans; -+ if (rcvr->chans == 0) { -+ list_del_rcu(&rcvr->link); -+ rcvr->next = rcvrs; -+ rcvrs = rcvr; -+ } -+ } -+ } -+ mutex_unlock(&intf->cmd_rcvrs_mutex); -+ synchronize_rcu(); -+ while (rcvrs) { -+ rcvr = rcvrs; -+ rcvrs = rcvr->next; -+ kfree(rcvr); -+ } -+ return rv; -+} -+ -+void ipmi_user_set_run_to_completion(ipmi_user_t user, int val) -+{ -+ ipmi_smi_t intf = user->intf; -+ if (intf->handlers) -+ intf->handlers->set_run_to_completion(intf->send_info, val); -+} -+ -+static unsigned char -+ipmb_checksum(unsigned char *data, int size) -+{ -+ unsigned char csum = 0; -+ -+ for (; size > 0; size--, data++) -+ csum += *data; -+ -+ return -csum; -+} -+ -+static inline void format_ipmb_msg(struct ipmi_smi_msg *smi_msg, -+ struct kernel_ipmi_msg *msg, -+ struct ipmi_ipmb_addr *ipmb_addr, -+ long msgid, -+ unsigned char ipmb_seq, -+ int broadcast, -+ unsigned char source_address, -+ unsigned char source_lun) -+{ -+ int i = broadcast; -+ -+ /* Format the IPMB header data. */ -+ smi_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); -+ smi_msg->data[1] = IPMI_SEND_MSG_CMD; -+ smi_msg->data[2] = ipmb_addr->channel; -+ if (broadcast) -+ smi_msg->data[3] = 0; -+ smi_msg->data[i+3] = ipmb_addr->slave_addr; -+ smi_msg->data[i+4] = (msg->netfn << 2) | (ipmb_addr->lun & 0x3); -+ smi_msg->data[i+5] = ipmb_checksum(&(smi_msg->data[i+3]), 2); -+ smi_msg->data[i+6] = source_address; -+ smi_msg->data[i+7] = (ipmb_seq << 2) | source_lun; -+ smi_msg->data[i+8] = msg->cmd; -+ -+ /* Now tack on the data to the message. */ -+ if (msg->data_len > 0) -+ memcpy(&(smi_msg->data[i+9]), msg->data, -+ msg->data_len); -+ smi_msg->data_size = msg->data_len + 9; -+ -+ /* Now calculate the checksum and tack it on. */ -+ smi_msg->data[i+smi_msg->data_size] -+ = ipmb_checksum(&(smi_msg->data[i+6]), -+ smi_msg->data_size-6); -+ -+ /* Add on the checksum size and the offset from the -+ broadcast. */ -+ smi_msg->data_size += 1 + i; -+ -+ smi_msg->msgid = msgid; -+} -+ -+static inline void format_lan_msg(struct ipmi_smi_msg *smi_msg, -+ struct kernel_ipmi_msg *msg, -+ struct ipmi_lan_addr *lan_addr, -+ long msgid, -+ unsigned char ipmb_seq, -+ unsigned char source_lun) -+{ -+ /* Format the IPMB header data. */ -+ smi_msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); -+ smi_msg->data[1] = IPMI_SEND_MSG_CMD; -+ smi_msg->data[2] = lan_addr->channel; -+ smi_msg->data[3] = lan_addr->session_handle; -+ smi_msg->data[4] = lan_addr->remote_SWID; -+ smi_msg->data[5] = (msg->netfn << 2) | (lan_addr->lun & 0x3); -+ smi_msg->data[6] = ipmb_checksum(&(smi_msg->data[4]), 2); -+ smi_msg->data[7] = lan_addr->local_SWID; -+ smi_msg->data[8] = (ipmb_seq << 2) | source_lun; -+ smi_msg->data[9] = msg->cmd; -+ -+ /* Now tack on the data to the message. */ -+ if (msg->data_len > 0) -+ memcpy(&(smi_msg->data[10]), msg->data, -+ msg->data_len); -+ smi_msg->data_size = msg->data_len + 10; -+ -+ /* Now calculate the checksum and tack it on. */ -+ smi_msg->data[smi_msg->data_size] -+ = ipmb_checksum(&(smi_msg->data[7]), -+ smi_msg->data_size-7); -+ -+ /* Add on the checksum size and the offset from the -+ broadcast. */ -+ smi_msg->data_size += 1; -+ -+ smi_msg->msgid = msgid; -+} -+ -+/* Separate from ipmi_request so that the user does not have to be -+ supplied in certain circumstances (mainly at panic time). If -+ messages are supplied, they will be freed, even if an error -+ occurs. */ -+static int i_ipmi_request(ipmi_user_t user, -+ ipmi_smi_t intf, -+ struct ipmi_addr *addr, -+ long msgid, -+ struct kernel_ipmi_msg *msg, -+ void *user_msg_data, -+ void *supplied_smi, -+ struct ipmi_recv_msg *supplied_recv, -+ int priority, -+ unsigned char source_address, -+ unsigned char source_lun, -+ int retries, -+ unsigned int retry_time_ms) -+{ -+ int rv = 0; -+ struct ipmi_smi_msg *smi_msg; -+ struct ipmi_recv_msg *recv_msg; -+ unsigned long flags; -+ struct ipmi_smi_handlers *handlers; -+ -+ -+ if (supplied_recv) { -+ recv_msg = supplied_recv; -+ } else { -+ recv_msg = ipmi_alloc_recv_msg(); -+ if (recv_msg == NULL) { -+ return -ENOMEM; -+ } -+ } -+ recv_msg->user_msg_data = user_msg_data; -+ -+ if (supplied_smi) { -+ smi_msg = (struct ipmi_smi_msg *) supplied_smi; -+ } else { -+ smi_msg = ipmi_alloc_smi_msg(); -+ if (smi_msg == NULL) { -+ ipmi_free_recv_msg(recv_msg); -+ return -ENOMEM; -+ } -+ } -+ -+ rcu_read_lock(); -+ handlers = intf->handlers; -+ if (!handlers) { -+ rv = -ENODEV; -+ goto out_err; -+ } -+ -+ recv_msg->user = user; -+ if (user) -+ kref_get(&user->refcount); -+ recv_msg->msgid = msgid; -+ /* Store the message to send in the receive message so timeout -+ responses can get the proper response data. */ -+ recv_msg->msg = *msg; -+ -+ if (addr->addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) { -+ struct ipmi_system_interface_addr *smi_addr; -+ -+ if (msg->netfn & 1) { -+ /* Responses are not allowed to the SMI. */ -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ smi_addr = (struct ipmi_system_interface_addr *) addr; -+ if (smi_addr->lun > 3) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ memcpy(&recv_msg->addr, smi_addr, sizeof(*smi_addr)); -+ -+ if ((msg->netfn == IPMI_NETFN_APP_REQUEST) -+ && ((msg->cmd == IPMI_SEND_MSG_CMD) -+ || (msg->cmd == IPMI_GET_MSG_CMD) -+ || (msg->cmd == IPMI_READ_EVENT_MSG_BUFFER_CMD))) -+ { -+ /* We don't let the user do these, since we manage -+ the sequence numbers. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ if (((msg->netfn == IPMI_NETFN_APP_REQUEST) -+ && ((msg->cmd == IPMI_COLD_RESET_CMD) -+ || (msg->cmd == IPMI_WARM_RESET_CMD))) -+ || (msg->netfn == IPMI_NETFN_FIRMWARE_REQUEST)) -+ { -+ spin_lock_irqsave(&intf->maintenance_mode_lock, flags); -+ intf->auto_maintenance_timeout -+ = IPMI_MAINTENANCE_MODE_TIMEOUT; -+ if (!intf->maintenance_mode -+ && !intf->maintenance_mode_enable) -+ { -+ intf->maintenance_mode_enable = 1; -+ maintenance_mode_update(intf); -+ } -+ spin_unlock_irqrestore(&intf->maintenance_mode_lock, -+ flags); -+ } -+ -+ if ((msg->data_len + 2) > IPMI_MAX_MSG_LENGTH) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EMSGSIZE; -+ goto out_err; -+ } -+ -+ smi_msg->data[0] = (msg->netfn << 2) | (smi_addr->lun & 0x3); -+ smi_msg->data[1] = msg->cmd; -+ smi_msg->msgid = msgid; -+ smi_msg->user_data = recv_msg; -+ if (msg->data_len > 0) -+ memcpy(&(smi_msg->data[2]), msg->data, msg->data_len); -+ smi_msg->data_size = msg->data_len + 2; -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_local_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ } else if ((addr->addr_type == IPMI_IPMB_ADDR_TYPE) -+ || (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE)) -+ { -+ struct ipmi_ipmb_addr *ipmb_addr; -+ unsigned char ipmb_seq; -+ long seqid; -+ int broadcast = 0; -+ -+ if (addr->channel >= IPMI_MAX_CHANNELS) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ if (intf->channels[addr->channel].medium -+ != IPMI_CHANNEL_MEDIUM_IPMB) -+ { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ if (retries < 0) { -+ if (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE) -+ retries = 0; /* Don't retry broadcasts. */ -+ else -+ retries = 4; -+ } -+ if (addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE) { -+ /* Broadcasts add a zero at the beginning of the -+ message, but otherwise is the same as an IPMB -+ address. */ -+ addr->addr_type = IPMI_IPMB_ADDR_TYPE; -+ broadcast = 1; -+ } -+ -+ -+ /* Default to 1 second retries. */ -+ if (retry_time_ms == 0) -+ retry_time_ms = 1000; -+ -+ /* 9 for the header and 1 for the checksum, plus -+ possibly one for the broadcast. */ -+ if ((msg->data_len + 10 + broadcast) > IPMI_MAX_MSG_LENGTH) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EMSGSIZE; -+ goto out_err; -+ } -+ -+ ipmb_addr = (struct ipmi_ipmb_addr *) addr; -+ if (ipmb_addr->lun > 3) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ memcpy(&recv_msg->addr, ipmb_addr, sizeof(*ipmb_addr)); -+ -+ if (recv_msg->msg.netfn & 0x1) { -+ /* It's a response, so use the user's sequence -+ from msgid. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_ipmb_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ format_ipmb_msg(smi_msg, msg, ipmb_addr, msgid, -+ msgid, broadcast, -+ source_address, source_lun); -+ -+ /* Save the receive message so we can use it -+ to deliver the response. */ -+ smi_msg->user_data = recv_msg; -+ } else { -+ /* It's a command, so get a sequence for it. */ -+ -+ spin_lock_irqsave(&(intf->seq_lock), flags); -+ -+ spin_lock(&intf->counter_lock); -+ intf->sent_ipmb_commands++; -+ spin_unlock(&intf->counter_lock); -+ -+ /* Create a sequence number with a 1 second -+ timeout and 4 retries. */ -+ rv = intf_next_seq(intf, -+ recv_msg, -+ retry_time_ms, -+ retries, -+ broadcast, -+ &ipmb_seq, -+ &seqid); -+ if (rv) { -+ /* We have used up all the sequence numbers, -+ probably, so abort. */ -+ spin_unlock_irqrestore(&(intf->seq_lock), -+ flags); -+ goto out_err; -+ } -+ -+ /* Store the sequence number in the message, -+ so that when the send message response -+ comes back we can start the timer. */ -+ format_ipmb_msg(smi_msg, msg, ipmb_addr, -+ STORE_SEQ_IN_MSGID(ipmb_seq, seqid), -+ ipmb_seq, broadcast, -+ source_address, source_lun); -+ -+ /* Copy the message into the recv message data, so we -+ can retransmit it later if necessary. */ -+ memcpy(recv_msg->msg_data, smi_msg->data, -+ smi_msg->data_size); -+ recv_msg->msg.data = recv_msg->msg_data; -+ recv_msg->msg.data_len = smi_msg->data_size; -+ -+ /* We don't unlock until here, because we need -+ to copy the completed message into the -+ recv_msg before we release the lock. -+ Otherwise, race conditions may bite us. I -+ know that's pretty paranoid, but I prefer -+ to be correct. */ -+ spin_unlock_irqrestore(&(intf->seq_lock), flags); -+ } -+ } else if (addr->addr_type == IPMI_LAN_ADDR_TYPE) { -+ struct ipmi_lan_addr *lan_addr; -+ unsigned char ipmb_seq; -+ long seqid; -+ -+ if (addr->channel >= IPMI_MAX_CHANNELS) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ if ((intf->channels[addr->channel].medium -+ != IPMI_CHANNEL_MEDIUM_8023LAN) -+ && (intf->channels[addr->channel].medium -+ != IPMI_CHANNEL_MEDIUM_ASYNC)) -+ { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ retries = 4; -+ -+ /* Default to 1 second retries. */ -+ if (retry_time_ms == 0) -+ retry_time_ms = 1000; -+ -+ /* 11 for the header and 1 for the checksum. */ -+ if ((msg->data_len + 12) > IPMI_MAX_MSG_LENGTH) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EMSGSIZE; -+ goto out_err; -+ } -+ -+ lan_addr = (struct ipmi_lan_addr *) addr; -+ if (lan_addr->lun > 3) { -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+ memcpy(&recv_msg->addr, lan_addr, sizeof(*lan_addr)); -+ -+ if (recv_msg->msg.netfn & 0x1) { -+ /* It's a response, so use the user's sequence -+ from msgid. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_lan_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ format_lan_msg(smi_msg, msg, lan_addr, msgid, -+ msgid, source_lun); -+ -+ /* Save the receive message so we can use it -+ to deliver the response. */ -+ smi_msg->user_data = recv_msg; -+ } else { -+ /* It's a command, so get a sequence for it. */ -+ -+ spin_lock_irqsave(&(intf->seq_lock), flags); -+ -+ spin_lock(&intf->counter_lock); -+ intf->sent_lan_commands++; -+ spin_unlock(&intf->counter_lock); -+ -+ /* Create a sequence number with a 1 second -+ timeout and 4 retries. */ -+ rv = intf_next_seq(intf, -+ recv_msg, -+ retry_time_ms, -+ retries, -+ 0, -+ &ipmb_seq, -+ &seqid); -+ if (rv) { -+ /* We have used up all the sequence numbers, -+ probably, so abort. */ -+ spin_unlock_irqrestore(&(intf->seq_lock), -+ flags); -+ goto out_err; -+ } -+ -+ /* Store the sequence number in the message, -+ so that when the send message response -+ comes back we can start the timer. */ -+ format_lan_msg(smi_msg, msg, lan_addr, -+ STORE_SEQ_IN_MSGID(ipmb_seq, seqid), -+ ipmb_seq, source_lun); -+ -+ /* Copy the message into the recv message data, so we -+ can retransmit it later if necessary. */ -+ memcpy(recv_msg->msg_data, smi_msg->data, -+ smi_msg->data_size); -+ recv_msg->msg.data = recv_msg->msg_data; -+ recv_msg->msg.data_len = smi_msg->data_size; -+ -+ /* We don't unlock until here, because we need -+ to copy the completed message into the -+ recv_msg before we release the lock. -+ Otherwise, race conditions may bite us. I -+ know that's pretty paranoid, but I prefer -+ to be correct. */ -+ spin_unlock_irqrestore(&(intf->seq_lock), flags); -+ } -+ } else { -+ /* Unknown address type. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->sent_invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ rv = -EINVAL; -+ goto out_err; -+ } -+ -+#ifdef DEBUG_MSGING -+ { -+ int m; -+ for (m = 0; m < smi_msg->data_size; m++) -+ printk(" %2.2x", smi_msg->data[m]); -+ printk("\n"); -+ } -+#endif -+ -+ handlers->sender(intf->send_info, smi_msg, priority); -+ rcu_read_unlock(); -+ -+ return 0; -+ -+ out_err: -+ rcu_read_unlock(); -+ ipmi_free_smi_msg(smi_msg); -+ ipmi_free_recv_msg(recv_msg); -+ return rv; -+} -+ -+static int check_addr(ipmi_smi_t intf, -+ struct ipmi_addr *addr, -+ unsigned char *saddr, -+ unsigned char *lun) -+{ -+ if (addr->channel >= IPMI_MAX_CHANNELS) -+ return -EINVAL; -+ *lun = intf->channels[addr->channel].lun; -+ *saddr = intf->channels[addr->channel].address; -+ return 0; -+} -+ -+int ipmi_request_settime(ipmi_user_t user, -+ struct ipmi_addr *addr, -+ long msgid, -+ struct kernel_ipmi_msg *msg, -+ void *user_msg_data, -+ int priority, -+ int retries, -+ unsigned int retry_time_ms) -+{ -+ unsigned char saddr, lun; -+ int rv; -+ -+ if (!user) -+ return -EINVAL; -+ rv = check_addr(user->intf, addr, &saddr, &lun); -+ if (rv) -+ return rv; -+ return i_ipmi_request(user, -+ user->intf, -+ addr, -+ msgid, -+ msg, -+ user_msg_data, -+ NULL, NULL, -+ priority, -+ saddr, -+ lun, -+ retries, -+ retry_time_ms); -+} -+ -+int ipmi_request_supply_msgs(ipmi_user_t user, -+ struct ipmi_addr *addr, -+ long msgid, -+ struct kernel_ipmi_msg *msg, -+ void *user_msg_data, -+ void *supplied_smi, -+ struct ipmi_recv_msg *supplied_recv, -+ int priority) -+{ -+ unsigned char saddr, lun; -+ int rv; -+ -+ if (!user) -+ return -EINVAL; -+ rv = check_addr(user->intf, addr, &saddr, &lun); -+ if (rv) -+ return rv; -+ return i_ipmi_request(user, -+ user->intf, -+ addr, -+ msgid, -+ msg, -+ user_msg_data, -+ supplied_smi, -+ supplied_recv, -+ priority, -+ saddr, -+ lun, -+ -1, 0); -+} -+ -+#ifdef CONFIG_PROC_FS -+static int ipmb_file_read_proc(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ char *out = (char *) page; -+ ipmi_smi_t intf = data; -+ int i; -+ int rv = 0; -+ -+ for (i = 0; i < IPMI_MAX_CHANNELS; i++) -+ rv += sprintf(out+rv, "%x ", intf->channels[i].address); -+ out[rv-1] = '\n'; /* Replace the final space with a newline */ -+ out[rv] = '\0'; -+ rv++; -+ return rv; -+} -+ -+static int version_file_read_proc(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ char *out = (char *) page; -+ ipmi_smi_t intf = data; -+ -+ return sprintf(out, "%d.%d\n", -+ ipmi_version_major(&intf->bmc->id), -+ ipmi_version_minor(&intf->bmc->id)); -+} -+ -+static int stat_file_read_proc(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ char *out = (char *) page; -+ ipmi_smi_t intf = data; -+ -+ out += sprintf(out, "sent_invalid_commands: %d\n", -+ intf->sent_invalid_commands); -+ out += sprintf(out, "sent_local_commands: %d\n", -+ intf->sent_local_commands); -+ out += sprintf(out, "handled_local_responses: %d\n", -+ intf->handled_local_responses); -+ out += sprintf(out, "unhandled_local_responses: %d\n", -+ intf->unhandled_local_responses); -+ out += sprintf(out, "sent_ipmb_commands: %d\n", -+ intf->sent_ipmb_commands); -+ out += sprintf(out, "sent_ipmb_command_errs: %d\n", -+ intf->sent_ipmb_command_errs); -+ out += sprintf(out, "retransmitted_ipmb_commands: %d\n", -+ intf->retransmitted_ipmb_commands); -+ out += sprintf(out, "timed_out_ipmb_commands: %d\n", -+ intf->timed_out_ipmb_commands); -+ out += sprintf(out, "timed_out_ipmb_broadcasts: %d\n", -+ intf->timed_out_ipmb_broadcasts); -+ out += sprintf(out, "sent_ipmb_responses: %d\n", -+ intf->sent_ipmb_responses); -+ out += sprintf(out, "handled_ipmb_responses: %d\n", -+ intf->handled_ipmb_responses); -+ out += sprintf(out, "invalid_ipmb_responses: %d\n", -+ intf->invalid_ipmb_responses); -+ out += sprintf(out, "unhandled_ipmb_responses: %d\n", -+ intf->unhandled_ipmb_responses); -+ out += sprintf(out, "sent_lan_commands: %d\n", -+ intf->sent_lan_commands); -+ out += sprintf(out, "sent_lan_command_errs: %d\n", -+ intf->sent_lan_command_errs); -+ out += sprintf(out, "retransmitted_lan_commands: %d\n", -+ intf->retransmitted_lan_commands); -+ out += sprintf(out, "timed_out_lan_commands: %d\n", -+ intf->timed_out_lan_commands); -+ out += sprintf(out, "sent_lan_responses: %d\n", -+ intf->sent_lan_responses); -+ out += sprintf(out, "handled_lan_responses: %d\n", -+ intf->handled_lan_responses); -+ out += sprintf(out, "invalid_lan_responses: %d\n", -+ intf->invalid_lan_responses); -+ out += sprintf(out, "unhandled_lan_responses: %d\n", -+ intf->unhandled_lan_responses); -+ out += sprintf(out, "handled_commands: %d\n", -+ intf->handled_commands); -+ out += sprintf(out, "invalid_commands: %d\n", -+ intf->invalid_commands); -+ out += sprintf(out, "unhandled_commands: %d\n", -+ intf->unhandled_commands); -+ out += sprintf(out, "invalid_events: %d\n", -+ intf->invalid_events); -+ out += sprintf(out, "events: %d\n", -+ intf->events); -+ -+ return (out - ((char *) page)); -+} -+#endif /* CONFIG_PROC_FS */ -+ -+int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, -+ read_proc_t *read_proc, write_proc_t *write_proc, -+ void *data, struct module *owner) -+{ -+ int rv = 0; -+#ifdef CONFIG_PROC_FS -+ struct proc_dir_entry *file; -+ struct ipmi_proc_entry *entry; -+ -+ /* Create a list element. */ -+ entry = kmalloc(sizeof(*entry), GFP_KERNEL); -+ if (!entry) -+ return -ENOMEM; -+ entry->name = kmalloc(strlen(name)+1, GFP_KERNEL); -+ if (!entry->name) { -+ kfree(entry); -+ return -ENOMEM; -+ } -+ strcpy(entry->name, name); -+ -+ file = create_proc_entry(name, 0, smi->proc_dir); -+ if (!file) { -+ kfree(entry->name); -+ kfree(entry); -+ rv = -ENOMEM; -+ } else { -+ file->nlink = 1; -+ file->data = data; -+ file->read_proc = read_proc; -+ file->write_proc = write_proc; -+ file->owner = owner; -+ -+ spin_lock(&smi->proc_entry_lock); -+ /* Stick it on the list. */ -+ entry->next = smi->proc_entries; -+ smi->proc_entries = entry; -+ spin_unlock(&smi->proc_entry_lock); -+ } -+#endif /* CONFIG_PROC_FS */ -+ -+ return rv; -+} -+ -+static int add_proc_entries(ipmi_smi_t smi, int num) -+{ -+ int rv = 0; -+ -+#ifdef CONFIG_PROC_FS -+ sprintf(smi->proc_dir_name, "%d", num); -+ smi->proc_dir = proc_mkdir(smi->proc_dir_name, proc_ipmi_root); -+ if (!smi->proc_dir) -+ rv = -ENOMEM; -+ else { -+ smi->proc_dir->owner = THIS_MODULE; -+ } -+ -+ if (rv == 0) -+ rv = ipmi_smi_add_proc_entry(smi, "stats", -+ stat_file_read_proc, NULL, -+ smi, THIS_MODULE); -+ -+ if (rv == 0) -+ rv = ipmi_smi_add_proc_entry(smi, "ipmb", -+ ipmb_file_read_proc, NULL, -+ smi, THIS_MODULE); -+ -+ if (rv == 0) -+ rv = ipmi_smi_add_proc_entry(smi, "version", -+ version_file_read_proc, NULL, -+ smi, THIS_MODULE); -+#endif /* CONFIG_PROC_FS */ -+ -+ return rv; -+} -+ -+static void remove_proc_entries(ipmi_smi_t smi) -+{ -+#ifdef CONFIG_PROC_FS -+ struct ipmi_proc_entry *entry; -+ -+ spin_lock(&smi->proc_entry_lock); -+ while (smi->proc_entries) { -+ entry = smi->proc_entries; -+ smi->proc_entries = entry->next; -+ -+ remove_proc_entry(entry->name, smi->proc_dir); -+ kfree(entry->name); -+ kfree(entry); -+ } -+ spin_unlock(&smi->proc_entry_lock); -+ remove_proc_entry(smi->proc_dir_name, proc_ipmi_root); -+#endif /* CONFIG_PROC_FS */ -+} -+ -+static int __find_bmc_guid(struct device *dev, void *data) -+{ -+ unsigned char *id = data; -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ return memcmp(bmc->guid, id, 16) == 0; -+} -+ -+static struct bmc_device *ipmi_find_bmc_guid(struct device_driver *drv, -+ unsigned char *guid) -+{ -+ struct device *dev; -+ -+ dev = driver_find_device(drv, NULL, guid, __find_bmc_guid); -+ if (dev) -+ return dev_get_drvdata(dev); -+ else -+ return NULL; -+} -+ -+struct prod_dev_id { -+ unsigned int product_id; -+ unsigned char device_id; -+}; -+ -+static int __find_bmc_prod_dev_id(struct device *dev, void *data) -+{ -+ struct prod_dev_id *id = data; -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return (bmc->id.product_id == id->product_id -+ && bmc->id.device_id == id->device_id); -+} -+ -+static struct bmc_device *ipmi_find_bmc_prod_dev_id( -+ struct device_driver *drv, -+ unsigned int product_id, unsigned char device_id) -+{ -+ struct prod_dev_id id = { -+ .product_id = product_id, -+ .device_id = device_id, -+ }; -+ struct device *dev; -+ -+ dev = driver_find_device(drv, NULL, &id, __find_bmc_prod_dev_id); -+ if (dev) -+ return dev_get_drvdata(dev); -+ else -+ return NULL; -+} -+ -+static ssize_t device_id_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 10, "%u\n", bmc->id.device_id); -+} -+ -+static ssize_t provides_dev_sdrs_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 10, "%u\n", -+ (bmc->id.device_revision & 0x80) >> 7); -+} -+ -+static ssize_t revision_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 20, "%u\n", -+ bmc->id.device_revision & 0x0F); -+} -+ -+static ssize_t firmware_rev_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 20, "%u.%x\n", bmc->id.firmware_revision_1, -+ bmc->id.firmware_revision_2); -+} -+ -+static ssize_t ipmi_version_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 20, "%u.%u\n", -+ ipmi_version_major(&bmc->id), -+ ipmi_version_minor(&bmc->id)); -+} -+ -+static ssize_t add_dev_support_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 10, "0x%02x\n", -+ bmc->id.additional_device_support); -+} -+ -+static ssize_t manufacturer_id_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 20, "0x%6.6x\n", bmc->id.manufacturer_id); -+} -+ -+static ssize_t product_id_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 10, "0x%4.4x\n", bmc->id.product_id); -+} -+ -+static ssize_t aux_firmware_rev_show(struct device *dev, -+ struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 21, "0x%02x 0x%02x 0x%02x 0x%02x\n", -+ bmc->id.aux_firmware_revision[3], -+ bmc->id.aux_firmware_revision[2], -+ bmc->id.aux_firmware_revision[1], -+ bmc->id.aux_firmware_revision[0]); -+} -+ -+static ssize_t guid_show(struct device *dev, struct device_attribute *attr, -+ char *buf) -+{ -+ struct bmc_device *bmc = dev_get_drvdata(dev); -+ -+ return snprintf(buf, 100, "%Lx%Lx\n", -+ (long long) bmc->guid[0], -+ (long long) bmc->guid[8]); -+} -+ -+static void remove_files(struct bmc_device *bmc) -+{ -+ if (!bmc->dev) -+ return; -+ -+ device_remove_file(&bmc->dev->dev, -+ &bmc->device_id_attr); -+ device_remove_file(&bmc->dev->dev, -+ &bmc->provides_dev_sdrs_attr); -+ device_remove_file(&bmc->dev->dev, -+ &bmc->revision_attr); -+ device_remove_file(&bmc->dev->dev, -+ &bmc->firmware_rev_attr); -+ device_remove_file(&bmc->dev->dev, -+ &bmc->version_attr); -+ device_remove_file(&bmc->dev->dev, -+ &bmc->add_dev_support_attr); -+ device_remove_file(&bmc->dev->dev, -+ &bmc->manufacturer_id_attr); -+ device_remove_file(&bmc->dev->dev, -+ &bmc->product_id_attr); -+ -+ if (bmc->id.aux_firmware_revision_set) -+ device_remove_file(&bmc->dev->dev, -+ &bmc->aux_firmware_rev_attr); -+ if (bmc->guid_set) -+ device_remove_file(&bmc->dev->dev, -+ &bmc->guid_attr); -+} -+ -+static void -+cleanup_bmc_device(struct kref *ref) -+{ -+ struct bmc_device *bmc; -+ -+ bmc = container_of(ref, struct bmc_device, refcount); -+ -+ remove_files(bmc); -+ platform_device_unregister(bmc->dev); -+ kfree(bmc); -+} -+ -+static void ipmi_bmc_unregister(ipmi_smi_t intf) -+{ -+ struct bmc_device *bmc = intf->bmc; -+ -+ if (intf->sysfs_name) { -+ sysfs_remove_link(&intf->si_dev->kobj, intf->sysfs_name); -+ kfree(intf->sysfs_name); -+ intf->sysfs_name = NULL; -+ } -+ if (intf->my_dev_name) { -+ sysfs_remove_link(&bmc->dev->dev.kobj, intf->my_dev_name); -+ kfree(intf->my_dev_name); -+ intf->my_dev_name = NULL; -+ } -+ -+ mutex_lock(&ipmidriver_mutex); -+ kref_put(&bmc->refcount, cleanup_bmc_device); -+ intf->bmc = NULL; -+ mutex_unlock(&ipmidriver_mutex); -+} -+ -+static int create_files(struct bmc_device *bmc) -+{ -+ int err; -+ -+ bmc->device_id_attr.attr.name = "device_id"; -+ bmc->device_id_attr.attr.owner = THIS_MODULE; -+ bmc->device_id_attr.attr.mode = S_IRUGO; -+ bmc->device_id_attr.show = device_id_show; -+ -+ bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs"; -+ bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE; -+ bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; -+ bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; -+ -+ bmc->revision_attr.attr.name = "revision"; -+ bmc->revision_attr.attr.owner = THIS_MODULE; -+ bmc->revision_attr.attr.mode = S_IRUGO; -+ bmc->revision_attr.show = revision_show; -+ -+ bmc->firmware_rev_attr.attr.name = "firmware_revision"; -+ bmc->firmware_rev_attr.attr.owner = THIS_MODULE; -+ bmc->firmware_rev_attr.attr.mode = S_IRUGO; -+ bmc->firmware_rev_attr.show = firmware_rev_show; -+ -+ bmc->version_attr.attr.name = "ipmi_version"; -+ bmc->version_attr.attr.owner = THIS_MODULE; -+ bmc->version_attr.attr.mode = S_IRUGO; -+ bmc->version_attr.show = ipmi_version_show; -+ -+ bmc->add_dev_support_attr.attr.name = "additional_device_support"; -+ bmc->add_dev_support_attr.attr.owner = THIS_MODULE; -+ bmc->add_dev_support_attr.attr.mode = S_IRUGO; -+ bmc->add_dev_support_attr.show = add_dev_support_show; -+ -+ bmc->manufacturer_id_attr.attr.name = "manufacturer_id"; -+ bmc->manufacturer_id_attr.attr.owner = THIS_MODULE; -+ bmc->manufacturer_id_attr.attr.mode = S_IRUGO; -+ bmc->manufacturer_id_attr.show = manufacturer_id_show; -+ -+ bmc->product_id_attr.attr.name = "product_id"; -+ bmc->product_id_attr.attr.owner = THIS_MODULE; -+ bmc->product_id_attr.attr.mode = S_IRUGO; -+ bmc->product_id_attr.show = product_id_show; -+ -+ bmc->guid_attr.attr.name = "guid"; -+ bmc->guid_attr.attr.owner = THIS_MODULE; -+ bmc->guid_attr.attr.mode = S_IRUGO; -+ bmc->guid_attr.show = guid_show; -+ -+ bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision"; -+ bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE; -+ bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; -+ bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; -+ -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->device_id_attr); -+ if (err) goto out; -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->provides_dev_sdrs_attr); -+ if (err) goto out_devid; -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->revision_attr); -+ if (err) goto out_sdrs; -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->firmware_rev_attr); -+ if (err) goto out_rev; -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->version_attr); -+ if (err) goto out_firm; -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->add_dev_support_attr); -+ if (err) goto out_version; -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->manufacturer_id_attr); -+ if (err) goto out_add_dev; -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->product_id_attr); -+ if (err) goto out_manu; -+ if (bmc->id.aux_firmware_revision_set) { -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->aux_firmware_rev_attr); -+ if (err) goto out_prod_id; -+ } -+ if (bmc->guid_set) { -+ err = device_create_file(&bmc->dev->dev, -+ &bmc->guid_attr); -+ if (err) goto out_aux_firm; -+ } -+ -+ return 0; -+ -+out_aux_firm: -+ if (bmc->id.aux_firmware_revision_set) -+ device_remove_file(&bmc->dev->dev, -+ &bmc->aux_firmware_rev_attr); -+out_prod_id: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->product_id_attr); -+out_manu: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->manufacturer_id_attr); -+out_add_dev: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->add_dev_support_attr); -+out_version: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->version_attr); -+out_firm: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->firmware_rev_attr); -+out_rev: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->revision_attr); -+out_sdrs: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->provides_dev_sdrs_attr); -+out_devid: -+ device_remove_file(&bmc->dev->dev, -+ &bmc->device_id_attr); -+out: -+ return err; -+} -+ -+static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum, -+ const char *sysfs_name) -+{ -+ int rv; -+ struct bmc_device *bmc = intf->bmc; -+ struct bmc_device *old_bmc; -+ int size; -+ char dummy[1]; -+ -+ mutex_lock(&ipmidriver_mutex); -+ -+ /* -+ * Try to find if there is an bmc_device struct -+ * representing the interfaced BMC already -+ */ -+ if (bmc->guid_set) -+ old_bmc = ipmi_find_bmc_guid(&ipmidriver, bmc->guid); -+ else -+ old_bmc = ipmi_find_bmc_prod_dev_id(&ipmidriver, -+ bmc->id.product_id, -+ bmc->id.device_id); -+ -+ /* -+ * If there is already an bmc_device, free the new one, -+ * otherwise register the new BMC device -+ */ -+ if (old_bmc) { -+ kfree(bmc); -+ intf->bmc = old_bmc; -+ bmc = old_bmc; -+ -+ kref_get(&bmc->refcount); -+ mutex_unlock(&ipmidriver_mutex); -+ -+ printk(KERN_INFO -+ "ipmi: interfacing existing BMC (man_id: 0x%6.6x," -+ " prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n", -+ bmc->id.manufacturer_id, -+ bmc->id.product_id, -+ bmc->id.device_id); -+ } else { -+ char name[14]; -+ unsigned char orig_dev_id = bmc->id.device_id; -+ int warn_printed = 0; -+ -+ snprintf(name, sizeof(name), -+ "ipmi_bmc.%4.4x", bmc->id.product_id); -+ -+ while (ipmi_find_bmc_prod_dev_id(&ipmidriver, -+ bmc->id.product_id, -+ bmc->id.device_id)) { -+ if (!warn_printed) { -+ printk(KERN_WARNING PFX -+ "This machine has two different BMCs" -+ " with the same product id and device" -+ " id. This is an error in the" -+ " firmware, but incrementing the" -+ " device id to work around the problem." -+ " Prod ID = 0x%x, Dev ID = 0x%x\n", -+ bmc->id.product_id, bmc->id.device_id); -+ warn_printed = 1; -+ } -+ bmc->id.device_id++; /* Wraps at 255 */ -+ if (bmc->id.device_id == orig_dev_id) { -+ printk(KERN_ERR PFX -+ "Out of device ids!\n"); -+ break; -+ } -+ } -+ -+ bmc->dev = platform_device_alloc(name, bmc->id.device_id); -+ if (!bmc->dev) { -+ mutex_unlock(&ipmidriver_mutex); -+ printk(KERN_ERR -+ "ipmi_msghandler:" -+ " Unable to allocate platform device\n"); -+ return -ENOMEM; -+ } -+ bmc->dev->dev.driver = &ipmidriver; -+ dev_set_drvdata(&bmc->dev->dev, bmc); -+ kref_init(&bmc->refcount); -+ -+ rv = platform_device_add(bmc->dev); -+ mutex_unlock(&ipmidriver_mutex); -+ if (rv) { -+ platform_device_put(bmc->dev); -+ bmc->dev = NULL; -+ printk(KERN_ERR -+ "ipmi_msghandler:" -+ " Unable to register bmc device: %d\n", -+ rv); -+ /* Don't go to out_err, you can only do that if -+ the device is registered already. */ -+ return rv; -+ } -+ -+ rv = create_files(bmc); -+ if (rv) { -+ mutex_lock(&ipmidriver_mutex); -+ platform_device_unregister(bmc->dev); -+ mutex_unlock(&ipmidriver_mutex); -+ -+ return rv; -+ } -+ -+ printk(KERN_INFO -+ "ipmi: Found new BMC (man_id: 0x%6.6x, " -+ " prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n", -+ bmc->id.manufacturer_id, -+ bmc->id.product_id, -+ bmc->id.device_id); -+ } -+ -+ /* -+ * create symlink from system interface device to bmc device -+ * and back. -+ */ -+ intf->sysfs_name = kstrdup(sysfs_name, GFP_KERNEL); -+ if (!intf->sysfs_name) { -+ rv = -ENOMEM; -+ printk(KERN_ERR -+ "ipmi_msghandler: allocate link to BMC: %d\n", -+ rv); -+ goto out_err; -+ } -+ -+ rv = sysfs_create_link(&intf->si_dev->kobj, -+ &bmc->dev->dev.kobj, intf->sysfs_name); -+ if (rv) { -+ kfree(intf->sysfs_name); -+ intf->sysfs_name = NULL; -+ printk(KERN_ERR -+ "ipmi_msghandler: Unable to create bmc symlink: %d\n", -+ rv); -+ goto out_err; -+ } -+ -+ size = snprintf(dummy, 0, "ipmi%d", ifnum); -+ intf->my_dev_name = kmalloc(size+1, GFP_KERNEL); -+ if (!intf->my_dev_name) { -+ kfree(intf->sysfs_name); -+ intf->sysfs_name = NULL; -+ rv = -ENOMEM; -+ printk(KERN_ERR -+ "ipmi_msghandler: allocate link from BMC: %d\n", -+ rv); -+ goto out_err; -+ } -+ snprintf(intf->my_dev_name, size+1, "ipmi%d", ifnum); -+ -+ rv = sysfs_create_link(&bmc->dev->dev.kobj, &intf->si_dev->kobj, -+ intf->my_dev_name); -+ if (rv) { -+ kfree(intf->sysfs_name); -+ intf->sysfs_name = NULL; -+ kfree(intf->my_dev_name); -+ intf->my_dev_name = NULL; -+ printk(KERN_ERR -+ "ipmi_msghandler:" -+ " Unable to create symlink to bmc: %d\n", -+ rv); -+ goto out_err; -+ } -+ -+ return 0; -+ -+out_err: -+ ipmi_bmc_unregister(intf); -+ return rv; -+} -+ -+static int -+send_guid_cmd(ipmi_smi_t intf, int chan) -+{ -+ struct kernel_ipmi_msg msg; -+ struct ipmi_system_interface_addr si; -+ -+ si.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; -+ si.channel = IPMI_BMC_CHANNEL; -+ si.lun = 0; -+ -+ msg.netfn = IPMI_NETFN_APP_REQUEST; -+ msg.cmd = IPMI_GET_DEVICE_GUID_CMD; -+ msg.data = NULL; -+ msg.data_len = 0; -+ return i_ipmi_request(NULL, -+ intf, -+ (struct ipmi_addr *) &si, -+ 0, -+ &msg, -+ intf, -+ NULL, -+ NULL, -+ 0, -+ intf->channels[0].address, -+ intf->channels[0].lun, -+ -1, 0); -+} -+ -+static void -+guid_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg) -+{ -+ if ((msg->addr.addr_type != IPMI_SYSTEM_INTERFACE_ADDR_TYPE) -+ || (msg->msg.netfn != IPMI_NETFN_APP_RESPONSE) -+ || (msg->msg.cmd != IPMI_GET_DEVICE_GUID_CMD)) -+ /* Not for me */ -+ return; -+ -+ if (msg->msg.data[0] != 0) { -+ /* Error from getting the GUID, the BMC doesn't have one. */ -+ intf->bmc->guid_set = 0; -+ goto out; -+ } -+ -+ if (msg->msg.data_len < 17) { -+ intf->bmc->guid_set = 0; -+ printk(KERN_WARNING PFX -+ "guid_handler: The GUID response from the BMC was too" -+ " short, it was %d but should have been 17. Assuming" -+ " GUID is not available.\n", -+ msg->msg.data_len); -+ goto out; -+ } -+ -+ memcpy(intf->bmc->guid, msg->msg.data, 16); -+ intf->bmc->guid_set = 1; -+ out: -+ wake_up(&intf->waitq); -+} -+ -+static void -+get_guid(ipmi_smi_t intf) -+{ -+ int rv; -+ -+ intf->bmc->guid_set = 0x2; -+ intf->null_user_handler = guid_handler; -+ rv = send_guid_cmd(intf, 0); -+ if (rv) -+ /* Send failed, no GUID available. */ -+ intf->bmc->guid_set = 0; -+ wait_event(intf->waitq, intf->bmc->guid_set != 2); -+ intf->null_user_handler = NULL; -+} -+ -+static int -+send_channel_info_cmd(ipmi_smi_t intf, int chan) -+{ -+ struct kernel_ipmi_msg msg; -+ unsigned char data[1]; -+ struct ipmi_system_interface_addr si; -+ -+ si.addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; -+ si.channel = IPMI_BMC_CHANNEL; -+ si.lun = 0; -+ -+ msg.netfn = IPMI_NETFN_APP_REQUEST; -+ msg.cmd = IPMI_GET_CHANNEL_INFO_CMD; -+ msg.data = data; -+ msg.data_len = 1; -+ data[0] = chan; -+ return i_ipmi_request(NULL, -+ intf, -+ (struct ipmi_addr *) &si, -+ 0, -+ &msg, -+ intf, -+ NULL, -+ NULL, -+ 0, -+ intf->channels[0].address, -+ intf->channels[0].lun, -+ -1, 0); -+} -+ -+static void -+channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg) -+{ -+ int rv = 0; -+ int chan; -+ -+ if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) -+ && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) -+ && (msg->msg.cmd == IPMI_GET_CHANNEL_INFO_CMD)) -+ { -+ /* It's the one we want */ -+ if (msg->msg.data[0] != 0) { -+ /* Got an error from the channel, just go on. */ -+ -+ if (msg->msg.data[0] == IPMI_INVALID_COMMAND_ERR) { -+ /* If the MC does not support this -+ command, that is legal. We just -+ assume it has one IPMB at channel -+ zero. */ -+ intf->channels[0].medium -+ = IPMI_CHANNEL_MEDIUM_IPMB; -+ intf->channels[0].protocol -+ = IPMI_CHANNEL_PROTOCOL_IPMB; -+ rv = -ENOSYS; -+ -+ intf->curr_channel = IPMI_MAX_CHANNELS; -+ wake_up(&intf->waitq); -+ goto out; -+ } -+ goto next_channel; -+ } -+ if (msg->msg.data_len < 4) { -+ /* Message not big enough, just go on. */ -+ goto next_channel; -+ } -+ chan = intf->curr_channel; -+ intf->channels[chan].medium = msg->msg.data[2] & 0x7f; -+ intf->channels[chan].protocol = msg->msg.data[3] & 0x1f; -+ -+ next_channel: -+ intf->curr_channel++; -+ if (intf->curr_channel >= IPMI_MAX_CHANNELS) -+ wake_up(&intf->waitq); -+ else -+ rv = send_channel_info_cmd(intf, intf->curr_channel); -+ -+ if (rv) { -+ /* Got an error somehow, just give up. */ -+ intf->curr_channel = IPMI_MAX_CHANNELS; -+ wake_up(&intf->waitq); -+ -+ printk(KERN_WARNING PFX -+ "Error sending channel information: %d\n", -+ rv); -+ } -+ } -+ out: -+ return; -+} -+ -+int ipmi_register_smi(struct ipmi_smi_handlers *handlers, -+ void *send_info, -+ struct ipmi_device_id *device_id, -+ struct device *si_dev, -+ const char *sysfs_name, -+ unsigned char slave_addr) -+{ -+ int i, j; -+ int rv; -+ ipmi_smi_t intf; -+ ipmi_smi_t tintf; -+ struct list_head *link; -+ -+ /* Make sure the driver is actually initialized, this handles -+ problems with initialization order. */ -+ if (!initialized) { -+ rv = ipmi_init_msghandler(); -+ if (rv) -+ return rv; -+ /* The init code doesn't return an error if it was turned -+ off, but it won't initialize. Check that. */ -+ if (!initialized) -+ return -ENODEV; -+ } -+ -+ intf = kmalloc(sizeof(*intf), GFP_KERNEL); -+ if (!intf) -+ return -ENOMEM; -+ memset(intf, 0, sizeof(*intf)); -+ -+ intf->ipmi_version_major = ipmi_version_major(device_id); -+ intf->ipmi_version_minor = ipmi_version_minor(device_id); -+ -+ intf->bmc = kzalloc(sizeof(*intf->bmc), GFP_KERNEL); -+ if (!intf->bmc) { -+ kfree(intf); -+ return -ENOMEM; -+ } -+ intf->intf_num = -1; /* Mark it invalid for now. */ -+ kref_init(&intf->refcount); -+ intf->bmc->id = *device_id; -+ intf->si_dev = si_dev; -+ for (j = 0; j < IPMI_MAX_CHANNELS; j++) { -+ intf->channels[j].address = IPMI_BMC_SLAVE_ADDR; -+ intf->channels[j].lun = 2; -+ } -+ if (slave_addr != 0) -+ intf->channels[0].address = slave_addr; -+ INIT_LIST_HEAD(&intf->users); -+ intf->handlers = handlers; -+ intf->send_info = send_info; -+ spin_lock_init(&intf->seq_lock); -+ for (j = 0; j < IPMI_IPMB_NUM_SEQ; j++) { -+ intf->seq_table[j].inuse = 0; -+ intf->seq_table[j].seqid = 0; -+ } -+ intf->curr_seq = 0; -+#ifdef CONFIG_PROC_FS -+ spin_lock_init(&intf->proc_entry_lock); -+#endif -+ spin_lock_init(&intf->waiting_msgs_lock); -+ INIT_LIST_HEAD(&intf->waiting_msgs); -+ spin_lock_init(&intf->events_lock); -+ INIT_LIST_HEAD(&intf->waiting_events); -+ intf->waiting_events_count = 0; -+ mutex_init(&intf->cmd_rcvrs_mutex); -+ spin_lock_init(&intf->maintenance_mode_lock); -+ INIT_LIST_HEAD(&intf->cmd_rcvrs); -+ init_waitqueue_head(&intf->waitq); -+ -+ spin_lock_init(&intf->counter_lock); -+ intf->proc_dir = NULL; -+ -+ mutex_lock(&smi_watchers_mutex); -+ mutex_lock(&ipmi_interfaces_mutex); -+ /* Look for a hole in the numbers. */ -+ i = 0; -+ link = &ipmi_interfaces; -+ list_for_each_entry_rcu(tintf, &ipmi_interfaces, link) { -+ if (tintf->intf_num != i) { -+ link = &tintf->link; -+ break; -+ } -+ i++; -+ } -+ /* Add the new interface in numeric order. */ -+ if (i == 0) -+ list_add_rcu(&intf->link, &ipmi_interfaces); -+ else -+ list_add_tail_rcu(&intf->link, link); -+ -+ rv = handlers->start_processing(send_info, intf); -+ if (rv) -+ goto out; -+ -+ get_guid(intf); -+ -+ if ((intf->ipmi_version_major > 1) -+ || ((intf->ipmi_version_major == 1) -+ && (intf->ipmi_version_minor >= 5))) -+ { -+ /* Start scanning the channels to see what is -+ available. */ -+ intf->null_user_handler = channel_handler; -+ intf->curr_channel = 0; -+ rv = send_channel_info_cmd(intf, 0); -+ if (rv) -+ goto out; -+ -+ /* Wait for the channel info to be read. */ -+ wait_event(intf->waitq, -+ intf->curr_channel >= IPMI_MAX_CHANNELS); -+ intf->null_user_handler = NULL; -+ } else { -+ /* Assume a single IPMB channel at zero. */ -+ intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB; -+ intf->channels[0].protocol = IPMI_CHANNEL_PROTOCOL_IPMB; -+ } -+ -+ if (rv == 0) -+ rv = add_proc_entries(intf, i); -+ -+ rv = ipmi_bmc_register(intf, i, sysfs_name); -+ -+ out: -+ if (rv) { -+ if (intf->proc_dir) -+ remove_proc_entries(intf); -+ intf->handlers = NULL; -+ list_del_rcu(&intf->link); -+ mutex_unlock(&ipmi_interfaces_mutex); -+ mutex_unlock(&smi_watchers_mutex); -+ synchronize_rcu(); -+ kref_put(&intf->refcount, intf_free); -+ } else { -+ /* After this point the interface is legal to use. */ -+ intf->intf_num = i; -+ mutex_unlock(&ipmi_interfaces_mutex); -+ call_smi_watchers(i, intf->si_dev); -+ mutex_unlock(&smi_watchers_mutex); -+ } -+ -+ return rv; -+} -+ -+static void cleanup_smi_msgs(ipmi_smi_t intf) -+{ -+ int i; -+ struct seq_table *ent; -+ -+ /* No need for locks, the interface is down. */ -+ for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) { -+ ent = &(intf->seq_table[i]); -+ if (!ent->inuse) -+ continue; -+ deliver_err_response(ent->recv_msg, IPMI_ERR_UNSPECIFIED); -+ } -+} -+ -+int ipmi_unregister_smi(ipmi_smi_t intf) -+{ -+ struct ipmi_smi_watcher *w; -+ int intf_num = intf->intf_num; -+ -+ ipmi_bmc_unregister(intf); -+ -+ mutex_lock(&smi_watchers_mutex); -+ mutex_lock(&ipmi_interfaces_mutex); -+ intf->intf_num = -1; -+ intf->handlers = NULL; -+ list_del_rcu(&intf->link); -+ mutex_unlock(&ipmi_interfaces_mutex); -+ synchronize_rcu(); -+ -+ cleanup_smi_msgs(intf); -+ -+ remove_proc_entries(intf); -+ -+ /* Call all the watcher interfaces to tell them that -+ an interface is gone. */ -+ list_for_each_entry(w, &smi_watchers, link) -+ w->smi_gone(intf_num); -+ mutex_unlock(&smi_watchers_mutex); -+ -+ kref_put(&intf->refcount, intf_free); -+ return 0; -+} -+ -+static int handle_ipmb_get_msg_rsp(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ struct ipmi_ipmb_addr ipmb_addr; -+ struct ipmi_recv_msg *recv_msg; -+ unsigned long flags; -+ -+ -+ /* This is 11, not 10, because the response must contain a -+ * completion code. */ -+ if (msg->rsp_size < 11) { -+ /* Message not big enough, just ignore it. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->invalid_ipmb_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ return 0; -+ } -+ -+ if (msg->rsp[2] != 0) { -+ /* An error getting the response, just ignore it. */ -+ return 0; -+ } -+ -+ ipmb_addr.addr_type = IPMI_IPMB_ADDR_TYPE; -+ ipmb_addr.slave_addr = msg->rsp[6]; -+ ipmb_addr.channel = msg->rsp[3] & 0x0f; -+ ipmb_addr.lun = msg->rsp[7] & 3; -+ -+ /* It's a response from a remote entity. Look up the sequence -+ number and handle the response. */ -+ if (intf_find_seq(intf, -+ msg->rsp[7] >> 2, -+ msg->rsp[3] & 0x0f, -+ msg->rsp[8], -+ (msg->rsp[4] >> 2) & (~1), -+ (struct ipmi_addr *) &(ipmb_addr), -+ &recv_msg)) -+ { -+ /* We were unable to find the sequence number, -+ so just nuke the message. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->unhandled_ipmb_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ return 0; -+ } -+ -+ memcpy(recv_msg->msg_data, -+ &(msg->rsp[9]), -+ msg->rsp_size - 9); -+ /* THe other fields matched, so no need to set them, except -+ for netfn, which needs to be the response that was -+ returned, not the request value. */ -+ recv_msg->msg.netfn = msg->rsp[4] >> 2; -+ recv_msg->msg.data = recv_msg->msg_data; -+ recv_msg->msg.data_len = msg->rsp_size - 10; -+ recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE; -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->handled_ipmb_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ deliver_response(recv_msg); -+ -+ return 0; -+} -+ -+static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ struct cmd_rcvr *rcvr; -+ int rv = 0; -+ unsigned char netfn; -+ unsigned char cmd; -+ unsigned char chan; -+ ipmi_user_t user = NULL; -+ struct ipmi_ipmb_addr *ipmb_addr; -+ struct ipmi_recv_msg *recv_msg; -+ unsigned long flags; -+ struct ipmi_smi_handlers *handlers; -+ -+ if (msg->rsp_size < 10) { -+ /* Message not big enough, just ignore it. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ return 0; -+ } -+ -+ if (msg->rsp[2] != 0) { -+ /* An error getting the response, just ignore it. */ -+ return 0; -+ } -+ -+ netfn = msg->rsp[4] >> 2; -+ cmd = msg->rsp[8]; -+ chan = msg->rsp[3] & 0xf; -+ -+ rcu_read_lock(); -+ rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); -+ if (rcvr) { -+ user = rcvr->user; -+ kref_get(&user->refcount); -+ } else -+ user = NULL; -+ rcu_read_unlock(); -+ -+ if (user == NULL) { -+ /* We didn't find a user, deliver an error response. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->unhandled_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ -+ msg->data[0] = (IPMI_NETFN_APP_REQUEST << 2); -+ msg->data[1] = IPMI_SEND_MSG_CMD; -+ msg->data[2] = msg->rsp[3]; -+ msg->data[3] = msg->rsp[6]; -+ msg->data[4] = ((netfn + 1) << 2) | (msg->rsp[7] & 0x3); -+ msg->data[5] = ipmb_checksum(&(msg->data[3]), 2); -+ msg->data[6] = intf->channels[msg->rsp[3] & 0xf].address; -+ /* rqseq/lun */ -+ msg->data[7] = (msg->rsp[7] & 0xfc) | (msg->rsp[4] & 0x3); -+ msg->data[8] = msg->rsp[8]; /* cmd */ -+ msg->data[9] = IPMI_INVALID_CMD_COMPLETION_CODE; -+ msg->data[10] = ipmb_checksum(&(msg->data[6]), 4); -+ msg->data_size = 11; -+ -+#ifdef DEBUG_MSGING -+ { -+ int m; -+ printk("Invalid command:"); -+ for (m = 0; m < msg->data_size; m++) -+ printk(" %2.2x", msg->data[m]); -+ printk("\n"); -+ } -+#endif -+ rcu_read_lock(); -+ handlers = intf->handlers; -+ if (handlers) { -+ handlers->sender(intf->send_info, msg, 0); -+ /* We used the message, so return the value -+ that causes it to not be freed or -+ queued. */ -+ rv = -1; -+ } -+ rcu_read_unlock(); -+ } else { -+ /* Deliver the message to the user. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->handled_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ -+ recv_msg = ipmi_alloc_recv_msg(); -+ if (!recv_msg) { -+ /* We couldn't allocate memory for the -+ message, so requeue it for handling -+ later. */ -+ rv = 1; -+ kref_put(&user->refcount, free_user); -+ } else { -+ /* Extract the source address from the data. */ -+ ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr; -+ ipmb_addr->addr_type = IPMI_IPMB_ADDR_TYPE; -+ ipmb_addr->slave_addr = msg->rsp[6]; -+ ipmb_addr->lun = msg->rsp[7] & 3; -+ ipmb_addr->channel = msg->rsp[3] & 0xf; -+ -+ /* Extract the rest of the message information -+ from the IPMB header.*/ -+ recv_msg->user = user; -+ recv_msg->recv_type = IPMI_CMD_RECV_TYPE; -+ recv_msg->msgid = msg->rsp[7] >> 2; -+ recv_msg->msg.netfn = msg->rsp[4] >> 2; -+ recv_msg->msg.cmd = msg->rsp[8]; -+ recv_msg->msg.data = recv_msg->msg_data; -+ -+ /* We chop off 10, not 9 bytes because the checksum -+ at the end also needs to be removed. */ -+ recv_msg->msg.data_len = msg->rsp_size - 10; -+ memcpy(recv_msg->msg_data, -+ &(msg->rsp[9]), -+ msg->rsp_size - 10); -+ deliver_response(recv_msg); -+ } -+ } -+ -+ return rv; -+} -+ -+static int handle_lan_get_msg_rsp(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ struct ipmi_lan_addr lan_addr; -+ struct ipmi_recv_msg *recv_msg; -+ unsigned long flags; -+ -+ -+ /* This is 13, not 12, because the response must contain a -+ * completion code. */ -+ if (msg->rsp_size < 13) { -+ /* Message not big enough, just ignore it. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->invalid_lan_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ return 0; -+ } -+ -+ if (msg->rsp[2] != 0) { -+ /* An error getting the response, just ignore it. */ -+ return 0; -+ } -+ -+ lan_addr.addr_type = IPMI_LAN_ADDR_TYPE; -+ lan_addr.session_handle = msg->rsp[4]; -+ lan_addr.remote_SWID = msg->rsp[8]; -+ lan_addr.local_SWID = msg->rsp[5]; -+ lan_addr.channel = msg->rsp[3] & 0x0f; -+ lan_addr.privilege = msg->rsp[3] >> 4; -+ lan_addr.lun = msg->rsp[9] & 3; -+ -+ /* It's a response from a remote entity. Look up the sequence -+ number and handle the response. */ -+ if (intf_find_seq(intf, -+ msg->rsp[9] >> 2, -+ msg->rsp[3] & 0x0f, -+ msg->rsp[10], -+ (msg->rsp[6] >> 2) & (~1), -+ (struct ipmi_addr *) &(lan_addr), -+ &recv_msg)) -+ { -+ /* We were unable to find the sequence number, -+ so just nuke the message. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->unhandled_lan_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ return 0; -+ } -+ -+ memcpy(recv_msg->msg_data, -+ &(msg->rsp[11]), -+ msg->rsp_size - 11); -+ /* The other fields matched, so no need to set them, except -+ for netfn, which needs to be the response that was -+ returned, not the request value. */ -+ recv_msg->msg.netfn = msg->rsp[6] >> 2; -+ recv_msg->msg.data = recv_msg->msg_data; -+ recv_msg->msg.data_len = msg->rsp_size - 12; -+ recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE; -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->handled_lan_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ deliver_response(recv_msg); -+ -+ return 0; -+} -+ -+static int handle_lan_get_msg_cmd(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ struct cmd_rcvr *rcvr; -+ int rv = 0; -+ unsigned char netfn; -+ unsigned char cmd; -+ unsigned char chan; -+ ipmi_user_t user = NULL; -+ struct ipmi_lan_addr *lan_addr; -+ struct ipmi_recv_msg *recv_msg; -+ unsigned long flags; -+ -+ if (msg->rsp_size < 12) { -+ /* Message not big enough, just ignore it. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->invalid_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ return 0; -+ } -+ -+ if (msg->rsp[2] != 0) { -+ /* An error getting the response, just ignore it. */ -+ return 0; -+ } -+ -+ netfn = msg->rsp[6] >> 2; -+ cmd = msg->rsp[10]; -+ chan = msg->rsp[3] & 0xf; -+ -+ rcu_read_lock(); -+ rcvr = find_cmd_rcvr(intf, netfn, cmd, chan); -+ if (rcvr) { -+ user = rcvr->user; -+ kref_get(&user->refcount); -+ } else -+ user = NULL; -+ rcu_read_unlock(); -+ -+ if (user == NULL) { -+ /* We didn't find a user, just give up. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->unhandled_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ -+ rv = 0; /* Don't do anything with these messages, just -+ allow them to be freed. */ -+ } else { -+ /* Deliver the message to the user. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->handled_commands++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ -+ recv_msg = ipmi_alloc_recv_msg(); -+ if (!recv_msg) { -+ /* We couldn't allocate memory for the -+ message, so requeue it for handling -+ later. */ -+ rv = 1; -+ kref_put(&user->refcount, free_user); -+ } else { -+ /* Extract the source address from the data. */ -+ lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr; -+ lan_addr->addr_type = IPMI_LAN_ADDR_TYPE; -+ lan_addr->session_handle = msg->rsp[4]; -+ lan_addr->remote_SWID = msg->rsp[8]; -+ lan_addr->local_SWID = msg->rsp[5]; -+ lan_addr->lun = msg->rsp[9] & 3; -+ lan_addr->channel = msg->rsp[3] & 0xf; -+ lan_addr->privilege = msg->rsp[3] >> 4; -+ -+ /* Extract the rest of the message information -+ from the IPMB header.*/ -+ recv_msg->user = user; -+ recv_msg->recv_type = IPMI_CMD_RECV_TYPE; -+ recv_msg->msgid = msg->rsp[9] >> 2; -+ recv_msg->msg.netfn = msg->rsp[6] >> 2; -+ recv_msg->msg.cmd = msg->rsp[10]; -+ recv_msg->msg.data = recv_msg->msg_data; -+ -+ /* We chop off 12, not 11 bytes because the checksum -+ at the end also needs to be removed. */ -+ recv_msg->msg.data_len = msg->rsp_size - 12; -+ memcpy(recv_msg->msg_data, -+ &(msg->rsp[11]), -+ msg->rsp_size - 12); -+ deliver_response(recv_msg); -+ } -+ } -+ -+ return rv; -+} -+ -+static void copy_event_into_recv_msg(struct ipmi_recv_msg *recv_msg, -+ struct ipmi_smi_msg *msg) -+{ -+ struct ipmi_system_interface_addr *smi_addr; -+ -+ recv_msg->msgid = 0; -+ smi_addr = (struct ipmi_system_interface_addr *) &(recv_msg->addr); -+ smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; -+ smi_addr->channel = IPMI_BMC_CHANNEL; -+ smi_addr->lun = msg->rsp[0] & 3; -+ recv_msg->recv_type = IPMI_ASYNC_EVENT_RECV_TYPE; -+ recv_msg->msg.netfn = msg->rsp[0] >> 2; -+ recv_msg->msg.cmd = msg->rsp[1]; -+ memcpy(recv_msg->msg_data, &(msg->rsp[3]), msg->rsp_size - 3); -+ recv_msg->msg.data = recv_msg->msg_data; -+ recv_msg->msg.data_len = msg->rsp_size - 3; -+} -+ -+static int handle_read_event_rsp(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ struct ipmi_recv_msg *recv_msg, *recv_msg2; -+ struct list_head msgs; -+ ipmi_user_t user; -+ int rv = 0; -+ int deliver_count = 0; -+ unsigned long flags; -+ -+ if (msg->rsp_size < 19) { -+ /* Message is too small to be an IPMB event. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->invalid_events++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ return 0; -+ } -+ -+ if (msg->rsp[2] != 0) { -+ /* An error getting the event, just ignore it. */ -+ return 0; -+ } -+ -+ INIT_LIST_HEAD(&msgs); -+ -+ spin_lock_irqsave(&intf->events_lock, flags); -+ -+ spin_lock(&intf->counter_lock); -+ intf->events++; -+ spin_unlock(&intf->counter_lock); -+ -+ /* Allocate and fill in one message for every user that is getting -+ events. */ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(user, &intf->users, link) { -+ if (!user->gets_events) -+ continue; -+ -+ recv_msg = ipmi_alloc_recv_msg(); -+ if (!recv_msg) { -+ rcu_read_unlock(); -+ list_for_each_entry_safe(recv_msg, recv_msg2, &msgs, -+ link) { -+ list_del(&recv_msg->link); -+ ipmi_free_recv_msg(recv_msg); -+ } -+ /* We couldn't allocate memory for the -+ message, so requeue it for handling -+ later. */ -+ rv = 1; -+ goto out; -+ } -+ -+ deliver_count++; -+ -+ copy_event_into_recv_msg(recv_msg, msg); -+ recv_msg->user = user; -+ kref_get(&user->refcount); -+ list_add_tail(&(recv_msg->link), &msgs); -+ } -+ rcu_read_unlock(); -+ -+ if (deliver_count) { -+ /* Now deliver all the messages. */ -+ list_for_each_entry_safe(recv_msg, recv_msg2, &msgs, link) { -+ list_del(&recv_msg->link); -+ deliver_response(recv_msg); -+ } -+ } else if (intf->waiting_events_count < MAX_EVENTS_IN_QUEUE) { -+ /* No one to receive the message, put it in queue if there's -+ not already too many things in the queue. */ -+ recv_msg = ipmi_alloc_recv_msg(); -+ if (!recv_msg) { -+ /* We couldn't allocate memory for the -+ message, so requeue it for handling -+ later. */ -+ rv = 1; -+ goto out; -+ } -+ -+ copy_event_into_recv_msg(recv_msg, msg); -+ list_add_tail(&(recv_msg->link), &(intf->waiting_events)); -+ intf->waiting_events_count++; -+ } else { -+ /* There's too many things in the queue, discard this -+ message. */ -+ printk(KERN_WARNING PFX "Event queue full, discarding an" -+ " incoming event\n"); -+ } -+ -+ out: -+ spin_unlock_irqrestore(&(intf->events_lock), flags); -+ -+ return rv; -+} -+ -+static int handle_bmc_rsp(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ struct ipmi_recv_msg *recv_msg; -+ unsigned long flags; -+ struct ipmi_user *user; -+ -+ recv_msg = (struct ipmi_recv_msg *) msg->user_data; -+ if (recv_msg == NULL) -+ { -+ printk(KERN_WARNING"IPMI message received with no owner. This\n" -+ "could be because of a malformed message, or\n" -+ "because of a hardware error. Contact your\n" -+ "hardware vender for assistance\n"); -+ return 0; -+ } -+ -+ user = recv_msg->user; -+ /* Make sure the user still exists. */ -+ if (user && !user->valid) { -+ /* The user for the message went away, so give up. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->unhandled_local_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ ipmi_free_recv_msg(recv_msg); -+ } else { -+ struct ipmi_system_interface_addr *smi_addr; -+ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ intf->handled_local_responses++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE; -+ recv_msg->msgid = msg->msgid; -+ smi_addr = ((struct ipmi_system_interface_addr *) -+ &(recv_msg->addr)); -+ smi_addr->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; -+ smi_addr->channel = IPMI_BMC_CHANNEL; -+ smi_addr->lun = msg->rsp[0] & 3; -+ recv_msg->msg.netfn = msg->rsp[0] >> 2; -+ recv_msg->msg.cmd = msg->rsp[1]; -+ memcpy(recv_msg->msg_data, -+ &(msg->rsp[2]), -+ msg->rsp_size - 2); -+ recv_msg->msg.data = recv_msg->msg_data; -+ recv_msg->msg.data_len = msg->rsp_size - 2; -+ deliver_response(recv_msg); -+ } -+ -+ return 0; -+} -+ -+/* Handle a new message. Return 1 if the message should be requeued, -+ 0 if the message should be freed, or -1 if the message should not -+ be freed or requeued. */ -+static int handle_new_recv_msg(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ int requeue; -+ int chan; -+ -+#ifdef DEBUG_MSGING -+ int m; -+ printk("Recv:"); -+ for (m = 0; m < msg->rsp_size; m++) -+ printk(" %2.2x", msg->rsp[m]); -+ printk("\n"); -+#endif -+ if (msg->rsp_size < 2) { -+ /* Message is too small to be correct. */ -+ printk(KERN_WARNING PFX "BMC returned to small a message" -+ " for netfn %x cmd %x, got %d bytes\n", -+ (msg->data[0] >> 2) | 1, msg->data[1], msg->rsp_size); -+ -+ /* Generate an error response for the message. */ -+ msg->rsp[0] = msg->data[0] | (1 << 2); -+ msg->rsp[1] = msg->data[1]; -+ msg->rsp[2] = IPMI_ERR_UNSPECIFIED; -+ msg->rsp_size = 3; -+ } else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1))/* Netfn */ -+ || (msg->rsp[1] != msg->data[1])) /* Command */ -+ { -+ /* The response is not even marginally correct. */ -+ printk(KERN_WARNING PFX "BMC returned incorrect response," -+ " expected netfn %x cmd %x, got netfn %x cmd %x\n", -+ (msg->data[0] >> 2) | 1, msg->data[1], -+ msg->rsp[0] >> 2, msg->rsp[1]); -+ -+ /* Generate an error response for the message. */ -+ msg->rsp[0] = msg->data[0] | (1 << 2); -+ msg->rsp[1] = msg->data[1]; -+ msg->rsp[2] = IPMI_ERR_UNSPECIFIED; -+ msg->rsp_size = 3; -+ } -+ -+ if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) -+ && (msg->rsp[1] == IPMI_SEND_MSG_CMD) -+ && (msg->user_data != NULL)) -+ { -+ /* It's a response to a response we sent. For this we -+ deliver a send message response to the user. */ -+ struct ipmi_recv_msg *recv_msg = msg->user_data; -+ -+ requeue = 0; -+ if (msg->rsp_size < 2) -+ /* Message is too small to be correct. */ -+ goto out; -+ -+ chan = msg->data[2] & 0x0f; -+ if (chan >= IPMI_MAX_CHANNELS) -+ /* Invalid channel number */ -+ goto out; -+ -+ if (!recv_msg) -+ goto out; -+ -+ /* Make sure the user still exists. */ -+ if (!recv_msg->user || !recv_msg->user->valid) -+ goto out; -+ -+ recv_msg->recv_type = IPMI_RESPONSE_RESPONSE_TYPE; -+ recv_msg->msg.data = recv_msg->msg_data; -+ recv_msg->msg.data_len = 1; -+ recv_msg->msg_data[0] = msg->rsp[2]; -+ deliver_response(recv_msg); -+ } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) -+ && (msg->rsp[1] == IPMI_GET_MSG_CMD)) -+ { -+ /* It's from the receive queue. */ -+ chan = msg->rsp[3] & 0xf; -+ if (chan >= IPMI_MAX_CHANNELS) { -+ /* Invalid channel number */ -+ requeue = 0; -+ goto out; -+ } -+ -+ switch (intf->channels[chan].medium) { -+ case IPMI_CHANNEL_MEDIUM_IPMB: -+ if (msg->rsp[4] & 0x04) { -+ /* It's a response, so find the -+ requesting message and send it up. */ -+ requeue = handle_ipmb_get_msg_rsp(intf, msg); -+ } else { -+ /* It's a command to the SMS from some other -+ entity. Handle that. */ -+ requeue = handle_ipmb_get_msg_cmd(intf, msg); -+ } -+ break; -+ -+ case IPMI_CHANNEL_MEDIUM_8023LAN: -+ case IPMI_CHANNEL_MEDIUM_ASYNC: -+ if (msg->rsp[6] & 0x04) { -+ /* It's a response, so find the -+ requesting message and send it up. */ -+ requeue = handle_lan_get_msg_rsp(intf, msg); -+ } else { -+ /* It's a command to the SMS from some other -+ entity. Handle that. */ -+ requeue = handle_lan_get_msg_cmd(intf, msg); -+ } -+ break; -+ -+ default: -+ /* We don't handle the channel type, so just -+ * free the message. */ -+ requeue = 0; -+ } -+ -+ } else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2)) -+ && (msg->rsp[1] == IPMI_READ_EVENT_MSG_BUFFER_CMD)) -+ { -+ /* It's an asyncronous event. */ -+ requeue = handle_read_event_rsp(intf, msg); -+ } else { -+ /* It's a response from the local BMC. */ -+ requeue = handle_bmc_rsp(intf, msg); -+ } -+ -+ out: -+ return requeue; -+} -+ -+/* Handle a new message from the lower layer. */ -+void ipmi_smi_msg_received(ipmi_smi_t intf, -+ struct ipmi_smi_msg *msg) -+{ -+ unsigned long flags; -+ int rv; -+ -+ -+ if ((msg->data_size >= 2) -+ && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2)) -+ && (msg->data[1] == IPMI_SEND_MSG_CMD) -+ && (msg->user_data == NULL)) -+ { -+ /* This is the local response to a command send, start -+ the timer for these. The user_data will not be -+ NULL if this is a response send, and we will let -+ response sends just go through. */ -+ -+ /* Check for errors, if we get certain errors (ones -+ that mean basically we can try again later), we -+ ignore them and start the timer. Otherwise we -+ report the error immediately. */ -+ if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0) -+ && (msg->rsp[2] != IPMI_NODE_BUSY_ERR) -+ && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR) -+ && (msg->rsp[2] != IPMI_BUS_ERR) -+ && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) -+ { -+ int chan = msg->rsp[3] & 0xf; -+ -+ /* Got an error sending the message, handle it. */ -+ spin_lock_irqsave(&intf->counter_lock, flags); -+ if (chan >= IPMI_MAX_CHANNELS) -+ ; /* This shouldn't happen */ -+ else if ((intf->channels[chan].medium -+ == IPMI_CHANNEL_MEDIUM_8023LAN) -+ || (intf->channels[chan].medium -+ == IPMI_CHANNEL_MEDIUM_ASYNC)) -+ intf->sent_lan_command_errs++; -+ else -+ intf->sent_ipmb_command_errs++; -+ spin_unlock_irqrestore(&intf->counter_lock, flags); -+ intf_err_seq(intf, msg->msgid, msg->rsp[2]); -+ } else { -+ /* The message was sent, start the timer. */ -+ intf_start_seq_timer(intf, msg->msgid); -+ } -+ -+ ipmi_free_smi_msg(msg); -+ goto out; -+ } -+ -+ /* To preserve message order, if the list is not empty, we -+ tack this message onto the end of the list. */ -+ spin_lock_irqsave(&intf->waiting_msgs_lock, flags); -+ if (!list_empty(&intf->waiting_msgs)) { -+ list_add_tail(&msg->link, &intf->waiting_msgs); -+ spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); -+ goto out; -+ } -+ spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); -+ -+ rv = handle_new_recv_msg(intf, msg); -+ if (rv > 0) { -+ /* Could not handle the message now, just add it to a -+ list to handle later. */ -+ spin_lock_irqsave(&intf->waiting_msgs_lock, flags); -+ list_add_tail(&msg->link, &intf->waiting_msgs); -+ spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); -+ } else if (rv == 0) { -+ ipmi_free_smi_msg(msg); -+ } -+ -+ out: -+ return; -+} -+ -+void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf) -+{ -+ ipmi_user_t user; -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(user, &intf->users, link) { -+ if (!user->handler->ipmi_watchdog_pretimeout) -+ continue; -+ -+ user->handler->ipmi_watchdog_pretimeout(user->handler_data); -+ } -+ rcu_read_unlock(); -+} -+ -+ -+static struct ipmi_smi_msg * -+smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg, -+ unsigned char seq, long seqid) -+{ -+ struct ipmi_smi_msg *smi_msg = ipmi_alloc_smi_msg(); -+ if (!smi_msg) -+ /* If we can't allocate the message, then just return, we -+ get 4 retries, so this should be ok. */ -+ return NULL; -+ -+ memcpy(smi_msg->data, recv_msg->msg.data, recv_msg->msg.data_len); -+ smi_msg->data_size = recv_msg->msg.data_len; -+ smi_msg->msgid = STORE_SEQ_IN_MSGID(seq, seqid); -+ -+#ifdef DEBUG_MSGING -+ { -+ int m; -+ printk("Resend: "); -+ for (m = 0; m < smi_msg->data_size; m++) -+ printk(" %2.2x", smi_msg->data[m]); -+ printk("\n"); -+ } -+#endif -+ return smi_msg; -+} -+ -+static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent, -+ struct list_head *timeouts, long timeout_period, -+ int slot, unsigned long *flags) -+{ -+ struct ipmi_recv_msg *msg; -+ struct ipmi_smi_handlers *handlers; -+ -+ if (intf->intf_num == -1) -+ return; -+ -+ if (!ent->inuse) -+ return; -+ -+ ent->timeout -= timeout_period; -+ if (ent->timeout > 0) -+ return; -+ -+ if (ent->retries_left == 0) { -+ /* The message has used all its retries. */ -+ ent->inuse = 0; -+ msg = ent->recv_msg; -+ list_add_tail(&msg->link, timeouts); -+ spin_lock(&intf->counter_lock); -+ if (ent->broadcast) -+ intf->timed_out_ipmb_broadcasts++; -+ else if (ent->recv_msg->addr.addr_type == IPMI_LAN_ADDR_TYPE) -+ intf->timed_out_lan_commands++; -+ else -+ intf->timed_out_ipmb_commands++; -+ spin_unlock(&intf->counter_lock); -+ } else { -+ struct ipmi_smi_msg *smi_msg; -+ /* More retries, send again. */ -+ -+ /* Start with the max timer, set to normal -+ timer after the message is sent. */ -+ ent->timeout = MAX_MSG_TIMEOUT; -+ ent->retries_left--; -+ spin_lock(&intf->counter_lock); -+ if (ent->recv_msg->addr.addr_type == IPMI_LAN_ADDR_TYPE) -+ intf->retransmitted_lan_commands++; -+ else -+ intf->retransmitted_ipmb_commands++; -+ spin_unlock(&intf->counter_lock); -+ -+ smi_msg = smi_from_recv_msg(intf, ent->recv_msg, slot, -+ ent->seqid); -+ if (!smi_msg) -+ return; -+ -+ spin_unlock_irqrestore(&intf->seq_lock, *flags); -+ -+ /* Send the new message. We send with a zero -+ * priority. It timed out, I doubt time is -+ * that critical now, and high priority -+ * messages are really only for messages to the -+ * local MC, which don't get resent. */ -+ handlers = intf->handlers; -+ if (handlers) -+ intf->handlers->sender(intf->send_info, -+ smi_msg, 0); -+ else -+ ipmi_free_smi_msg(smi_msg); -+ -+ spin_lock_irqsave(&intf->seq_lock, *flags); -+ } -+} -+ -+static void ipmi_timeout_handler(long timeout_period) -+{ -+ ipmi_smi_t intf; -+ struct list_head timeouts; -+ struct ipmi_recv_msg *msg, *msg2; -+ struct ipmi_smi_msg *smi_msg, *smi_msg2; -+ unsigned long flags; -+ int i; -+ -+ rcu_read_lock(); -+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { -+ /* See if any waiting messages need to be processed. */ -+ spin_lock_irqsave(&intf->waiting_msgs_lock, flags); -+ list_for_each_entry_safe(smi_msg, smi_msg2, -+ &intf->waiting_msgs, link) { -+ if (!handle_new_recv_msg(intf, smi_msg)) { -+ list_del(&smi_msg->link); -+ ipmi_free_smi_msg(smi_msg); -+ } else { -+ /* To preserve message order, quit if we -+ can't handle a message. */ -+ break; -+ } -+ } -+ spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags); -+ -+ /* Go through the seq table and find any messages that -+ have timed out, putting them in the timeouts -+ list. */ -+ INIT_LIST_HEAD(&timeouts); -+ spin_lock_irqsave(&intf->seq_lock, flags); -+ for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) -+ check_msg_timeout(intf, &(intf->seq_table[i]), -+ &timeouts, timeout_period, i, -+ &flags); -+ spin_unlock_irqrestore(&intf->seq_lock, flags); -+ -+ list_for_each_entry_safe(msg, msg2, &timeouts, link) -+ deliver_err_response(msg, IPMI_TIMEOUT_COMPLETION_CODE); -+ -+ /* -+ * Maintenance mode handling. Check the timeout -+ * optimistically before we claim the lock. It may -+ * mean a timeout gets missed occasionally, but that -+ * only means the timeout gets extended by one period -+ * in that case. No big deal, and it avoids the lock -+ * most of the time. -+ */ -+ if (intf->auto_maintenance_timeout > 0) { -+ spin_lock_irqsave(&intf->maintenance_mode_lock, flags); -+ if (intf->auto_maintenance_timeout > 0) { -+ intf->auto_maintenance_timeout -+ -= timeout_period; -+ if (!intf->maintenance_mode -+ && (intf->auto_maintenance_timeout <= 0)) -+ { -+ intf->maintenance_mode_enable = 0; -+ maintenance_mode_update(intf); -+ } -+ } -+ spin_unlock_irqrestore(&intf->maintenance_mode_lock, -+ flags); -+ } -+ } -+ rcu_read_unlock(); -+} -+ -+static void ipmi_request_event(void) -+{ -+ ipmi_smi_t intf; -+ struct ipmi_smi_handlers *handlers; -+ -+ rcu_read_lock(); -+ /* Called from the timer, no need to check if handlers is -+ * valid. */ -+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { -+ /* No event requests when in maintenance mode. */ -+ if (intf->maintenance_mode_enable) -+ continue; -+ -+ handlers = intf->handlers; -+ if (handlers) -+ handlers->request_events(intf->send_info); -+ } -+ rcu_read_unlock(); -+} -+ -+static struct timer_list ipmi_timer; -+ -+/* Call every ~100 ms. */ -+#define IPMI_TIMEOUT_TIME 100 -+ -+/* How many jiffies does it take to get to the timeout time. */ -+#define IPMI_TIMEOUT_JIFFIES ((IPMI_TIMEOUT_TIME * HZ) / 1000) -+ -+/* Request events from the queue every second (this is the number of -+ IPMI_TIMEOUT_TIMES between event requests). Hopefully, in the -+ future, IPMI will add a way to know immediately if an event is in -+ the queue and this silliness can go away. */ -+#define IPMI_REQUEST_EV_TIME (1000 / (IPMI_TIMEOUT_TIME)) -+ -+static atomic_t stop_operation; -+static unsigned int ticks_to_req_ev = IPMI_REQUEST_EV_TIME; -+ -+static void ipmi_timeout(unsigned long data) -+{ -+ if (atomic_read(&stop_operation)) -+ return; -+ -+ ticks_to_req_ev--; -+ if (ticks_to_req_ev == 0) { -+ ipmi_request_event(); -+ ticks_to_req_ev = IPMI_REQUEST_EV_TIME; -+ } -+ -+ ipmi_timeout_handler(IPMI_TIMEOUT_TIME); -+ -+ mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); -+} -+ -+ -+static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0); -+static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); -+ -+/* FIXME - convert these to slabs. */ -+static void free_smi_msg(struct ipmi_smi_msg *msg) -+{ -+ atomic_dec(&smi_msg_inuse_count); -+ kfree(msg); -+} -+ -+struct ipmi_smi_msg *ipmi_alloc_smi_msg(void) -+{ -+ struct ipmi_smi_msg *rv; -+ rv = kmalloc(sizeof(struct ipmi_smi_msg), GFP_ATOMIC); -+ if (rv) { -+ rv->done = free_smi_msg; -+ rv->user_data = NULL; -+ atomic_inc(&smi_msg_inuse_count); -+ } -+ return rv; -+} -+ -+static void free_recv_msg(struct ipmi_recv_msg *msg) -+{ -+ atomic_dec(&recv_msg_inuse_count); -+ kfree(msg); -+} -+ -+struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) -+{ -+ struct ipmi_recv_msg *rv; -+ -+ rv = kmalloc(sizeof(struct ipmi_recv_msg), GFP_ATOMIC); -+ if (rv) { -+ rv->user = NULL; -+ rv->done = free_recv_msg; -+ atomic_inc(&recv_msg_inuse_count); -+ } -+ return rv; -+} -+ -+void ipmi_free_recv_msg(struct ipmi_recv_msg *msg) -+{ -+ if (msg->user) -+ kref_put(&msg->user->refcount, free_user); -+ msg->done(msg); -+} -+ -+#ifdef CONFIG_IPMI_PANIC_EVENT -+ -+static void dummy_smi_done_handler(struct ipmi_smi_msg *msg) -+{ -+} -+ -+static void dummy_recv_done_handler(struct ipmi_recv_msg *msg) -+{ -+} -+ -+#ifdef CONFIG_IPMI_PANIC_STRING -+static void event_receiver_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) -+{ -+ if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) -+ && (msg->msg.netfn == IPMI_NETFN_SENSOR_EVENT_RESPONSE) -+ && (msg->msg.cmd == IPMI_GET_EVENT_RECEIVER_CMD) -+ && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) -+ { -+ /* A get event receiver command, save it. */ -+ intf->event_receiver = msg->msg.data[1]; -+ intf->event_receiver_lun = msg->msg.data[2] & 0x3; -+ } -+} -+ -+static void device_id_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) -+{ -+ if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) -+ && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) -+ && (msg->msg.cmd == IPMI_GET_DEVICE_ID_CMD) -+ && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) -+ { -+ /* A get device id command, save if we are an event -+ receiver or generator. */ -+ intf->local_sel_device = (msg->msg.data[6] >> 2) & 1; -+ intf->local_event_generator = (msg->msg.data[6] >> 5) & 1; -+ } -+} -+#endif -+ -+static void send_panic_events(char *str) -+{ -+ struct kernel_ipmi_msg msg; -+ ipmi_smi_t intf; -+ unsigned char data[16]; -+ struct ipmi_system_interface_addr *si; -+ struct ipmi_addr addr; -+ struct ipmi_smi_msg smi_msg; -+ struct ipmi_recv_msg recv_msg; -+ -+ si = (struct ipmi_system_interface_addr *) &addr; -+ si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; -+ si->channel = IPMI_BMC_CHANNEL; -+ si->lun = 0; -+ -+ /* Fill in an event telling that we have failed. */ -+ msg.netfn = 0x04; /* Sensor or Event. */ -+ msg.cmd = 2; /* Platform event command. */ -+ msg.data = data; -+ msg.data_len = 8; -+ data[0] = 0x41; /* Kernel generator ID, IPMI table 5-4 */ -+ data[1] = 0x03; /* This is for IPMI 1.0. */ -+ data[2] = 0x20; /* OS Critical Stop, IPMI table 36-3 */ -+ data[4] = 0x6f; /* Sensor specific, IPMI table 36-1 */ -+ data[5] = 0xa1; /* Runtime stop OEM bytes 2 & 3. */ -+ -+ /* Put a few breadcrumbs in. Hopefully later we can add more things -+ to make the panic events more useful. */ -+ if (str) { -+ data[3] = str[0]; -+ data[6] = str[1]; -+ data[7] = str[2]; -+ } -+ -+ smi_msg.done = dummy_smi_done_handler; -+ recv_msg.done = dummy_recv_done_handler; -+ -+ /* For every registered interface, send the event. */ -+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { -+ if (!intf->handlers) -+ /* Interface is not ready. */ -+ continue; -+ -+ /* Send the event announcing the panic. */ -+ intf->handlers->set_run_to_completion(intf->send_info, 1); -+ i_ipmi_request(NULL, -+ intf, -+ &addr, -+ 0, -+ &msg, -+ intf, -+ &smi_msg, -+ &recv_msg, -+ 0, -+ intf->channels[0].address, -+ intf->channels[0].lun, -+ 0, 1); /* Don't retry, and don't wait. */ -+ } -+ -+#ifdef CONFIG_IPMI_PANIC_STRING -+ /* On every interface, dump a bunch of OEM event holding the -+ string. */ -+ if (!str) -+ return; -+ -+ /* For every registered interface, send the event. */ -+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { -+ char *p = str; -+ struct ipmi_ipmb_addr *ipmb; -+ int j; -+ -+ if (intf->intf_num == -1) -+ /* Interface was not ready yet. */ -+ continue; -+ -+ /* First job here is to figure out where to send the -+ OEM events. There's no way in IPMI to send OEM -+ events using an event send command, so we have to -+ find the SEL to put them in and stick them in -+ there. */ -+ -+ /* Get capabilities from the get device id. */ -+ intf->local_sel_device = 0; -+ intf->local_event_generator = 0; -+ intf->event_receiver = 0; -+ -+ /* Request the device info from the local MC. */ -+ msg.netfn = IPMI_NETFN_APP_REQUEST; -+ msg.cmd = IPMI_GET_DEVICE_ID_CMD; -+ msg.data = NULL; -+ msg.data_len = 0; -+ intf->null_user_handler = device_id_fetcher; -+ i_ipmi_request(NULL, -+ intf, -+ &addr, -+ 0, -+ &msg, -+ intf, -+ &smi_msg, -+ &recv_msg, -+ 0, -+ intf->channels[0].address, -+ intf->channels[0].lun, -+ 0, 1); /* Don't retry, and don't wait. */ -+ -+ if (intf->local_event_generator) { -+ /* Request the event receiver from the local MC. */ -+ msg.netfn = IPMI_NETFN_SENSOR_EVENT_REQUEST; -+ msg.cmd = IPMI_GET_EVENT_RECEIVER_CMD; -+ msg.data = NULL; -+ msg.data_len = 0; -+ intf->null_user_handler = event_receiver_fetcher; -+ i_ipmi_request(NULL, -+ intf, -+ &addr, -+ 0, -+ &msg, -+ intf, -+ &smi_msg, -+ &recv_msg, -+ 0, -+ intf->channels[0].address, -+ intf->channels[0].lun, -+ 0, 1); /* no retry, and no wait. */ -+ } -+ intf->null_user_handler = NULL; -+ -+ /* Validate the event receiver. The low bit must not -+ be 1 (it must be a valid IPMB address), it cannot -+ be zero, and it must not be my address. */ -+ if (((intf->event_receiver & 1) == 0) -+ && (intf->event_receiver != 0) -+ && (intf->event_receiver != intf->channels[0].address)) -+ { -+ /* The event receiver is valid, send an IPMB -+ message. */ -+ ipmb = (struct ipmi_ipmb_addr *) &addr; -+ ipmb->addr_type = IPMI_IPMB_ADDR_TYPE; -+ ipmb->channel = 0; /* FIXME - is this right? */ -+ ipmb->lun = intf->event_receiver_lun; -+ ipmb->slave_addr = intf->event_receiver; -+ } else if (intf->local_sel_device) { -+ /* The event receiver was not valid (or was -+ me), but I am an SEL device, just dump it -+ in my SEL. */ -+ si = (struct ipmi_system_interface_addr *) &addr; -+ si->addr_type = IPMI_SYSTEM_INTERFACE_ADDR_TYPE; -+ si->channel = IPMI_BMC_CHANNEL; -+ si->lun = 0; -+ } else -+ continue; /* No where to send the event. */ -+ -+ -+ msg.netfn = IPMI_NETFN_STORAGE_REQUEST; /* Storage. */ -+ msg.cmd = IPMI_ADD_SEL_ENTRY_CMD; -+ msg.data = data; -+ msg.data_len = 16; -+ -+ j = 0; -+ while (*p) { -+ int size = strlen(p); -+ -+ if (size > 11) -+ size = 11; -+ data[0] = 0; -+ data[1] = 0; -+ data[2] = 0xf0; /* OEM event without timestamp. */ -+ data[3] = intf->channels[0].address; -+ data[4] = j++; /* sequence # */ -+ /* Always give 11 bytes, so strncpy will fill -+ it with zeroes for me. */ -+ strncpy(data+5, p, 11); -+ p += size; -+ -+ i_ipmi_request(NULL, -+ intf, -+ &addr, -+ 0, -+ &msg, -+ intf, -+ &smi_msg, -+ &recv_msg, -+ 0, -+ intf->channels[0].address, -+ intf->channels[0].lun, -+ 0, 1); /* no retry, and no wait. */ -+ } -+ } -+#endif /* CONFIG_IPMI_PANIC_STRING */ -+} -+#endif /* CONFIG_IPMI_PANIC_EVENT */ -+ -+static int has_panicked; -+ -+static int panic_event(struct notifier_block *this, -+ unsigned long event, -+ void *ptr) -+{ -+ ipmi_smi_t intf; -+ -+ if (has_panicked) -+ return NOTIFY_DONE; -+ has_panicked = 1; -+ -+ /* For every registered interface, set it to run to completion. */ -+ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { -+ if (!intf->handlers) -+ /* Interface is not ready. */ -+ continue; -+ -+ intf->handlers->set_run_to_completion(intf->send_info, 1); -+ } -+ -+#ifdef CONFIG_IPMI_PANIC_EVENT -+ send_panic_events(ptr); -+#endif -+ -+ return NOTIFY_DONE; -+} -+ -+static struct notifier_block panic_block = { -+ .notifier_call = panic_event, -+ .next = NULL, -+ .priority = 200 /* priority: INT_MAX >= x >= 0 */ -+}; -+ -+static int ipmi_init_msghandler(void) -+{ -+ int rv; -+ -+ if (initialized) -+ return 0; -+ -+ rv = driver_register(&ipmidriver); -+ if (rv) { -+ printk(KERN_ERR PFX "Could not register IPMI driver\n"); -+ return rv; -+ } -+ -+ printk(KERN_INFO "ipmi message handler version " -+ IPMI_DRIVER_VERSION "\n"); -+ -+#ifdef CONFIG_PROC_FS -+ proc_ipmi_root = proc_mkdir("ipmi", NULL); -+ if (!proc_ipmi_root) { -+ printk(KERN_ERR PFX "Unable to create IPMI proc dir"); -+ return -ENOMEM; -+ } -+ -+ proc_ipmi_root->owner = THIS_MODULE; -+#endif /* CONFIG_PROC_FS */ -+ -+ setup_timer(&ipmi_timer, ipmi_timeout, 0); -+ mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); -+ -+ atomic_notifier_chain_register(&panic_notifier_list, &panic_block); -+ -+ initialized = 1; -+ -+ return 0; -+} -+ -+static __init int ipmi_init_msghandler_mod(void) -+{ -+ ipmi_init_msghandler(); -+ return 0; -+} -+ -+static __exit void cleanup_ipmi(void) -+{ -+ int count; -+ -+ if (!initialized) -+ return; -+ -+ atomic_notifier_chain_unregister(&panic_notifier_list, &panic_block); -+ -+ /* This can't be called if any interfaces exist, so no worry about -+ shutting down the interfaces. */ -+ -+ /* Tell the timer to stop, then wait for it to stop. This avoids -+ problems with race conditions removing the timer here. */ -+ atomic_inc(&stop_operation); -+ del_timer_sync(&ipmi_timer); -+ -+#ifdef CONFIG_PROC_FS -+ remove_proc_entry(proc_ipmi_root->name, &proc_root); -+#endif /* CONFIG_PROC_FS */ -+ -+ driver_unregister(&ipmidriver); -+ -+ initialized = 0; -+ -+ /* Check for buffer leaks. */ -+ count = atomic_read(&smi_msg_inuse_count); -+ if (count != 0) -+ printk(KERN_WARNING PFX "SMI message count %d at exit\n", -+ count); -+ count = atomic_read(&recv_msg_inuse_count); -+ if (count != 0) -+ printk(KERN_WARNING PFX "recv message count %d at exit\n", -+ count); -+} -+module_exit(cleanup_ipmi); -+ -+module_init(ipmi_init_msghandler_mod); -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Corey Minyard <minyard@mvista.com>"); -+MODULE_DESCRIPTION("Incoming and outgoing message routing for an IPMI interface."); -+MODULE_VERSION(IPMI_DRIVER_VERSION); -+ -+EXPORT_SYMBOL(ipmi_create_user); -+EXPORT_SYMBOL(ipmi_destroy_user); -+EXPORT_SYMBOL(ipmi_get_version); -+EXPORT_SYMBOL(ipmi_request_settime); -+EXPORT_SYMBOL(ipmi_request_supply_msgs); -+EXPORT_SYMBOL(ipmi_register_smi); -+EXPORT_SYMBOL(ipmi_unregister_smi); -+EXPORT_SYMBOL(ipmi_register_for_cmd); -+EXPORT_SYMBOL(ipmi_unregister_for_cmd); -+EXPORT_SYMBOL(ipmi_smi_msg_received); -+EXPORT_SYMBOL(ipmi_smi_watchdog_pretimeout); -+EXPORT_SYMBOL(ipmi_alloc_smi_msg); -+EXPORT_SYMBOL(ipmi_addr_length); -+EXPORT_SYMBOL(ipmi_validate_addr); -+EXPORT_SYMBOL(ipmi_set_gets_events); -+EXPORT_SYMBOL(ipmi_smi_watcher_register); -+EXPORT_SYMBOL(ipmi_smi_watcher_unregister); -+EXPORT_SYMBOL(ipmi_set_my_address); -+EXPORT_SYMBOL(ipmi_get_my_address); -+EXPORT_SYMBOL(ipmi_set_my_LUN); -+EXPORT_SYMBOL(ipmi_get_my_LUN); -+EXPORT_SYMBOL(ipmi_smi_add_proc_entry); -+EXPORT_SYMBOL(ipmi_user_set_run_to_completion); -+EXPORT_SYMBOL(ipmi_free_recv_msg); diff -rduNp linux-2.6.20.3.orig/drivers/hwmon/Kconfig linux-2.6.20.3/drivers/hwmon/Kconfig --- linux-2.6.20.3.orig/drivers/hwmon/Kconfig 2007-03-13 19:27:08.000000000 +0100 +++ linux-2.6.20.3/drivers/hwmon/Kconfig 2007-03-14 14:23:02.000000000 +0100 diff --git a/toolchain/kernel-headers/linux-2.6.21.5-001-lzma-vmlinuz.00.patch b/toolchain/kernel-headers/linux-2.6.21.5-001-lzma-vmlinuz.00.patch new file mode 100644 index 000000000..87e50f8be --- /dev/null +++ b/toolchain/kernel-headers/linux-2.6.21.5-001-lzma-vmlinuz.00.patch @@ -0,0 +1,27017 @@ +diff --git a/.miniconfig b/.miniconfig +new file mode 100644 +index 0000000..5686e53 +--- /dev/null ++++ b/.miniconfig +@@ -0,0 +1,89 @@ ++#make allnoconfig KCONFIG_ALLCONFIG=miniconfig ++CONFIG_X86_32=y ++CONFIG_CLOCKSOURCE_WATCHDOG=y ++CONFIG_LOCKDEP_SUPPORT=y ++CONFIG_SEMAPHORE_SLEEPERS=y ++CONFIG_MMU=y ++CONFIG_GENERIC_ISA_DMA=y ++CONFIG_GENERIC_HWEIGHT=y ++CONFIG_DMI=y ++CONFIG_INIT_ENV_ARG_LIMIT=32 ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y ++CONFIG_SYSFS_DEPRECATED=y ++CONFIG_BLK_DEV_INITRD=y ++CONFIG_SYSCTL=y ++CONFIG_EMBEDDED=y ++CONFIG_PRINTK=y ++CONFIG_BASE_SMALL=1 ++CONFIG_BLOCK=y ++CONFIG_IOSCHED_NOOP=y ++CONFIG_DEFAULT_IOSCHED="noop" ++CONFIG_X86_GENERIC=y ++CONFIG_X86_L1_CACHE_SHIFT=7 ++CONFIG_GENERIC_CALIBRATE_DELAY=y ++CONFIG_X86_WP_WORKS_OK=y ++CONFIG_X86_BSWAP=y ++CONFIG_X86_CMPXCHG64=y ++CONFIG_X86_INTEL_USERCOPY=y ++CONFIG_X86_TSC=y ++CONFIG_PREEMPT_NONE=y ++CONFIG_VM86=y ++CONFIG_HIGHMEM=y ++CONFIG_FLATMEM=y ++CONFIG_MTRR=y ++CONFIG_HZ_250=y ++CONFIG_PHYSICAL_ALIGN=0x100000 ++CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y ++CONFIG_PM=y ++CONFIG_ACPI=y ++CONFIG_ACPI_SLEEP=y ++CONFIG_ACPI_BLACKLIST_YEAR=0 ++CONFIG_ACPI_EC=y ++CONFIG_ACPI_SYSTEM=y ++CONFIG_PCI=y ++CONFIG_PCI_GOANY=y ++CONFIG_PCI_DIRECT=y ++CONFIG_BINFMT_ELF=y ++CONFIG_STANDALONE=y ++CONFIG_BLK_DEV_LOOP=y ++CONFIG_IDE=y ++CONFIG_IDE_MAX_HWIFS=2 ++CONFIG_BLK_DEV_IDE=y ++CONFIG_BLK_DEV_IDEDISK=y ++CONFIG_IDEDISK_MULTI_MODE=y ++CONFIG_BLK_DEV_IDECD=y ++CONFIG_IDE_GENERIC=y ++CONFIG_INPUT_MOUSEDEV=y ++CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 ++CONFIG_INPUT_KEYBOARD=y ++CONFIG_KEYBOARD_ATKBD=y ++CONFIG_SERIO=y ++CONFIG_VT=y ++CONFIG_VT_CONSOLE=y ++CONFIG_UNIX98_PTYS=y ++CONFIG_VGA_CONSOLE=y ++CONFIG_USB_ARCH_HAS_HCD=y ++CONFIG_USB_ARCH_HAS_EHCI=y ++CONFIG_EXT2_FS=y ++CONFIG_DNOTIFY=y ++CONFIG_ISO9660_FS=y ++CONFIG_FAT_FS=y ++CONFIG_VFAT_FS=y ++CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" ++CONFIG_PROC_FS=y ++CONFIG_PROC_SYSCTL=y ++CONFIG_SYSFS=y ++CONFIG_RAMFS=y ++CONFIG_SQUASHFS=y ++CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 ++CONFIG_MSDOS_PARTITION=y ++CONFIG_NLS_DEFAULT="iso8859-1" ++CONFIG_AUFS=y ++CONFIG_AUFS_FAKE_DM=y ++CONFIG_EARLY_PRINTK=y ++CONFIG_DOUBLEFAULT=y ++CONFIG_ZLIB_INFLATE=y ++CONFIG_HAS_IOPORT=y ++CONFIG_GENERIC_IRQ_PROBE=y ++CONFIG_KTIME_SCALAR=y +diff --git a/Makefile b/Makefile +index d970cb1..a369204 100644 +--- a/Makefile ++++ b/Makefile +@@ -188,7 +188,7 @@ CROSS_COMPILE ?= + # Architecture as present in compile.h + UTS_MACHINE := $(ARCH) + +-KCONFIG_CONFIG ?= .config ++KCONFIG_CONFIG ?= .miniconfig + + # SHELL used by kbuild + CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ +diff --git a/arch/i386/boot/compressed/LzmaDecode.c b/arch/i386/boot/compressed/LzmaDecode.c +new file mode 100644 +index 0000000..21bf40b +--- /dev/null ++++ b/arch/i386/boot/compressed/LzmaDecode.c +@@ -0,0 +1,588 @@ ++/* ++ LzmaDecode.c ++ LZMA Decoder (optimized for Speed version) ++ ++ LZMA SDK 4.22 Copyright (c) 1999-2005 Igor Pavlov (2005-06-10) ++ http://www.7-zip.org/ ++ ++ LZMA SDK is licensed under two licenses: ++ 1) GNU Lesser General Public License (GNU LGPL) ++ 2) Common Public License (CPL) ++ It means that you can select one of these two licenses and ++ follow rules of that license. ++ ++ SPECIAL EXCEPTION: ++ Igor Pavlov, as the author of this Code, expressly permits you to ++ statically or dynamically link your Code (or bind by name) to the ++ interfaces of this file without subjecting your linked Code to the ++ terms of the CPL or GNU LGPL. Any modifications or additions ++ to this file, however, are subject to the LGPL or CPL terms. ++*/ ++ ++#include "LzmaDecode.h" ++ ++#ifndef Byte ++#define Byte unsigned char ++#endif ++ ++#define kNumTopBits 24 ++#define kTopValue ((UInt32)1 << kNumTopBits) ++ ++#define kNumBitModelTotalBits 11 ++#define kBitModelTotal (1 << kNumBitModelTotalBits) ++#define kNumMoveBits 5 ++ ++#define RC_READ_BYTE (*Buffer++) ++ ++#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \ ++ { int i; for(i = 0; i < 5; i++) { RC_TEST; Code = (Code << 8) | RC_READ_BYTE; }} ++ ++#ifdef _LZMA_IN_CB ++ ++#define RC_TEST { if (Buffer == BufferLim) \ ++ { SizeT size; int result = InCallback->Read(InCallback, &Buffer, &size); if (result != LZMA_RESULT_OK) return result; \ ++ BufferLim = Buffer + size; if (size == 0) return LZMA_RESULT_DATA_ERROR; }} ++ ++#define RC_INIT Buffer = BufferLim = 0; RC_INIT2 ++ ++#else ++ ++#define RC_TEST { if (Buffer == BufferLim) return LZMA_RESULT_DATA_ERROR; } ++ ++#define RC_INIT(buffer, bufferSize) Buffer = buffer; BufferLim = buffer + bufferSize; RC_INIT2 ++ ++#endif ++ ++#define RC_NORMALIZE if (Range < kTopValue) { RC_TEST; Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; } ++ ++#define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound) ++#define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits; ++#define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits; ++ ++#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \ ++ { UpdateBit0(p); mi <<= 1; A0; } else \ ++ { UpdateBit1(p); mi = (mi + mi) + 1; A1; } ++ ++#define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;) ++ ++#define RangeDecoderBitTreeDecode(probs, numLevels, res) \ ++ { int i = numLevels; res = 1; \ ++ do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \ ++ res -= (1 << numLevels); } ++ ++ ++#define kNumPosBitsMax 4 ++#define kNumPosStatesMax (1 << kNumPosBitsMax) ++ ++#define kLenNumLowBits 3 ++#define kLenNumLowSymbols (1 << kLenNumLowBits) ++#define kLenNumMidBits 3 ++#define kLenNumMidSymbols (1 << kLenNumMidBits) ++#define kLenNumHighBits 8 ++#define kLenNumHighSymbols (1 << kLenNumHighBits) ++ ++#define LenChoice 0 ++#define LenChoice2 (LenChoice + 1) ++#define LenLow (LenChoice2 + 1) ++#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) ++#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) ++#define kNumLenProbs (LenHigh + kLenNumHighSymbols) ++ ++ ++#define kNumStates 12 ++#define kNumLitStates 7 ++ ++#define kStartPosModelIndex 4 ++#define kEndPosModelIndex 14 ++#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) ++ ++#define kNumPosSlotBits 6 ++#define kNumLenToPosStates 4 ++ ++#define kNumAlignBits 4 ++#define kAlignTableSize (1 << kNumAlignBits) ++ ++#define kMatchMinLen 2 ++ ++#define IsMatch 0 ++#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) ++#define IsRepG0 (IsRep + kNumStates) ++#define IsRepG1 (IsRepG0 + kNumStates) ++#define IsRepG2 (IsRepG1 + kNumStates) ++#define IsRep0Long (IsRepG2 + kNumStates) ++#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) ++#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) ++#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) ++#define LenCoder (Align + kAlignTableSize) ++#define RepLenCoder (LenCoder + kNumLenProbs) ++#define Literal (RepLenCoder + kNumLenProbs) ++ ++#if Literal != LZMA_BASE_SIZE ++StopCompilingDueBUG ++#endif ++ ++int LzmaDecodeProperties(CLzmaProperties *propsRes, const unsigned char *propsData, int size) ++{ ++ unsigned char prop0; ++ if (size < LZMA_PROPERTIES_SIZE) ++ return LZMA_RESULT_DATA_ERROR; ++ prop0 = propsData[0]; ++ if (prop0 >= (9 * 5 * 5)) ++ return LZMA_RESULT_DATA_ERROR; ++ { ++ for (propsRes->pb = 0; prop0 >= (9 * 5); propsRes->pb++, prop0 -= (9 * 5)); ++ for (propsRes->lp = 0; prop0 >= 9; propsRes->lp++, prop0 -= 9); ++ propsRes->lc = prop0; ++ /* ++ unsigned char remainder = (unsigned char)(prop0 / 9); ++ propsRes->lc = prop0 % 9; ++ propsRes->pb = remainder / 5; ++ propsRes->lp = remainder % 5; ++ */ ++ } ++ ++ #ifdef _LZMA_OUT_READ ++ { ++ int i; ++ propsRes->DictionarySize = 0; ++ for (i = 0; i < 4; i++) ++ propsRes->DictionarySize += (UInt32)(propsData[1 + i]) << (i * 8); ++ if (propsRes->DictionarySize == 0) ++ propsRes->DictionarySize = 1; ++ } ++ #endif ++ return LZMA_RESULT_OK; ++} ++ ++#define kLzmaStreamWasFinishedId (-1) ++ ++int LzmaDecode(CLzmaDecoderState *vs, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *InCallback, ++ #else ++ const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed, ++ #endif ++ unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed) ++{ ++ CProb *p = vs->Probs; ++ SizeT nowPos = 0; ++ Byte previousByte = 0; ++ UInt32 posStateMask = (1 << (vs->Properties.pb)) - 1; ++ UInt32 literalPosMask = (1 << (vs->Properties.lp)) - 1; ++ int lc = vs->Properties.lc; ++ ++ #ifdef _LZMA_OUT_READ ++ ++ UInt32 Range = vs->Range; ++ UInt32 Code = vs->Code; ++ #ifdef _LZMA_IN_CB ++ const Byte *Buffer = vs->Buffer; ++ const Byte *BufferLim = vs->BufferLim; ++ #else ++ const Byte *Buffer = inStream; ++ const Byte *BufferLim = inStream + inSize; ++ #endif ++ int state = vs->State; ++ UInt32 rep0 = vs->Reps[0], rep1 = vs->Reps[1], rep2 = vs->Reps[2], rep3 = vs->Reps[3]; ++ int len = vs->RemainLen; ++ UInt32 globalPos = vs->GlobalPos; ++ UInt32 distanceLimit = vs->DistanceLimit; ++ ++ Byte *dictionary = vs->Dictionary; ++ UInt32 dictionarySize = vs->Properties.DictionarySize; ++ UInt32 dictionaryPos = vs->DictionaryPos; ++ ++ Byte tempDictionary[4]; ++ ++ #ifndef _LZMA_IN_CB ++ *inSizeProcessed = 0; ++ #endif ++ *outSizeProcessed = 0; ++ if (len == kLzmaStreamWasFinishedId) ++ return LZMA_RESULT_OK; ++ ++ if (dictionarySize == 0) ++ { ++ dictionary = tempDictionary; ++ dictionarySize = 1; ++ tempDictionary[0] = vs->TempDictionary[0]; ++ } ++ ++ if (len == kLzmaNeedInitId) ++ { ++ { ++ UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + vs->Properties.lp)); ++ UInt32 i; ++ for (i = 0; i < numProbs; i++) ++ p[i] = kBitModelTotal >> 1; ++ rep0 = rep1 = rep2 = rep3 = 1; ++ state = 0; ++ globalPos = 0; ++ distanceLimit = 0; ++ dictionaryPos = 0; ++ dictionary[dictionarySize - 1] = 0; ++ #ifdef _LZMA_IN_CB ++ RC_INIT; ++ #else ++ RC_INIT(inStream, inSize); ++ #endif ++ } ++ len = 0; ++ } ++ while(len != 0 && nowPos < outSize) ++ { ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ outStream[nowPos++] = dictionary[dictionaryPos] = dictionary[pos]; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ len--; ++ } ++ if (dictionaryPos == 0) ++ previousByte = dictionary[dictionarySize - 1]; ++ else ++ previousByte = dictionary[dictionaryPos - 1]; ++ ++ #else /* if !_LZMA_OUT_READ */ ++ ++ int state = 0; ++ UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; ++ int len = 0; ++ const Byte *Buffer; ++ const Byte *BufferLim; ++ UInt32 Range; ++ UInt32 Code; ++ ++ #ifndef _LZMA_IN_CB ++ *inSizeProcessed = 0; ++ #endif ++ *outSizeProcessed = 0; ++ ++ { ++ UInt32 i; ++ UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + vs->Properties.lp)); ++ for (i = 0; i < numProbs; i++) ++ p[i] = kBitModelTotal >> 1; ++ } ++ ++ #ifdef _LZMA_IN_CB ++ RC_INIT; ++ #else ++ RC_INIT(inStream, inSize); ++ #endif ++ ++ #endif /* _LZMA_OUT_READ */ ++ ++ while(nowPos < outSize) ++ { ++ CProb *prob; ++ UInt32 bound; ++ int posState = (int)( ++ (nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ & posStateMask); ++ ++ prob = p + IsMatch + (state << kNumPosBitsMax) + posState; ++ IfBit0(prob) ++ { ++ int symbol = 1; ++ UpdateBit0(prob) ++ prob = p + Literal + (LZMA_LIT_SIZE * ++ ((( ++ (nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ & literalPosMask) << lc) + (previousByte >> (8 - lc)))); ++ ++ if (state >= kNumLitStates) ++ { ++ int matchByte; ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ matchByte = dictionary[pos]; ++ #else ++ matchByte = outStream[nowPos - rep0]; ++ #endif ++ do ++ { ++ int bit; ++ CProb *probLit; ++ matchByte <<= 1; ++ bit = (matchByte & 0x100); ++ probLit = prob + 0x100 + bit + symbol; ++ RC_GET_BIT2(probLit, symbol, if (bit != 0) break, if (bit == 0) break) ++ } ++ while (symbol < 0x100); ++ } ++ while (symbol < 0x100) ++ { ++ CProb *probLit = prob + symbol; ++ RC_GET_BIT(probLit, symbol) ++ } ++ previousByte = (Byte)symbol; ++ ++ outStream[nowPos++] = previousByte; ++ #ifdef _LZMA_OUT_READ ++ if (distanceLimit < dictionarySize) ++ distanceLimit++; ++ ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #endif ++ if (state < 4) state = 0; ++ else if (state < 10) state -= 3; ++ else state -= 6; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ prob = p + IsRep + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ rep3 = rep2; ++ rep2 = rep1; ++ rep1 = rep0; ++ state = state < kNumLitStates ? 0 : 3; ++ prob = p + LenCoder; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ prob = p + IsRepG0 + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ prob = p + IsRep0Long + (state << kNumPosBitsMax) + posState; ++ IfBit0(prob) ++ { ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos; ++ #endif ++ UpdateBit0(prob); ++ ++ #ifdef _LZMA_OUT_READ ++ if (distanceLimit == 0) ++ #else ++ if (nowPos == 0) ++ #endif ++ return LZMA_RESULT_DATA_ERROR; ++ ++ state = state < kNumLitStates ? 9 : 11; ++ #ifdef _LZMA_OUT_READ ++ pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ previousByte = dictionary[pos]; ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #else ++ previousByte = outStream[nowPos - rep0]; ++ #endif ++ outStream[nowPos++] = previousByte; ++ #ifdef _LZMA_OUT_READ ++ if (distanceLimit < dictionarySize) ++ distanceLimit++; ++ #endif ++ ++ continue; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ } ++ } ++ else ++ { ++ UInt32 distance; ++ UpdateBit1(prob); ++ prob = p + IsRepG1 + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ distance = rep1; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ prob = p + IsRepG2 + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ distance = rep2; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ distance = rep3; ++ rep3 = rep2; ++ } ++ rep2 = rep1; ++ } ++ rep1 = rep0; ++ rep0 = distance; ++ } ++ state = state < kNumLitStates ? 8 : 11; ++ prob = p + RepLenCoder; ++ } ++ { ++ int numBits, offset; ++ CProb *probLen = prob + LenChoice; ++ IfBit0(probLen) ++ { ++ UpdateBit0(probLen); ++ probLen = prob + LenLow + (posState << kLenNumLowBits); ++ offset = 0; ++ numBits = kLenNumLowBits; ++ } ++ else ++ { ++ UpdateBit1(probLen); ++ probLen = prob + LenChoice2; ++ IfBit0(probLen) ++ { ++ UpdateBit0(probLen); ++ probLen = prob + LenMid + (posState << kLenNumMidBits); ++ offset = kLenNumLowSymbols; ++ numBits = kLenNumMidBits; ++ } ++ else ++ { ++ UpdateBit1(probLen); ++ probLen = prob + LenHigh; ++ offset = kLenNumLowSymbols + kLenNumMidSymbols; ++ numBits = kLenNumHighBits; ++ } ++ } ++ RangeDecoderBitTreeDecode(probLen, numBits, len); ++ len += offset; ++ } ++ ++ if (state < 4) ++ { ++ int posSlot; ++ state += kNumLitStates; ++ prob = p + PosSlot + ++ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << ++ kNumPosSlotBits); ++ RangeDecoderBitTreeDecode(prob, kNumPosSlotBits, posSlot); ++ if (posSlot >= kStartPosModelIndex) ++ { ++ int numDirectBits = ((posSlot >> 1) - 1); ++ rep0 = (2 | ((UInt32)posSlot & 1)); ++ if (posSlot < kEndPosModelIndex) ++ { ++ rep0 <<= numDirectBits; ++ prob = p + SpecPos + rep0 - posSlot - 1; ++ } ++ else ++ { ++ numDirectBits -= kNumAlignBits; ++ do ++ { ++ RC_NORMALIZE ++ Range >>= 1; ++ rep0 <<= 1; ++ if (Code >= Range) ++ { ++ Code -= Range; ++ rep0 |= 1; ++ } ++ } ++ while (--numDirectBits != 0); ++ prob = p + Align; ++ rep0 <<= kNumAlignBits; ++ numDirectBits = kNumAlignBits; ++ } ++ { ++ int i = 1; ++ int mi = 1; ++ do ++ { ++ CProb *prob3 = prob + mi; ++ RC_GET_BIT2(prob3, mi, ; , rep0 |= i); ++ i <<= 1; ++ } ++ while(--numDirectBits != 0); ++ } ++ } ++ else ++ rep0 = posSlot; ++ if (++rep0 == (UInt32)(0)) ++ { ++ /* it's for stream version */ ++ len = kLzmaStreamWasFinishedId; ++ break; ++ } ++ } ++ ++ len += kMatchMinLen; ++ #ifdef _LZMA_OUT_READ ++ if (rep0 > distanceLimit) ++ #else ++ if (rep0 > nowPos) ++ #endif ++ return LZMA_RESULT_DATA_ERROR; ++ ++ #ifdef _LZMA_OUT_READ ++ if (dictionarySize - distanceLimit > (UInt32)len) ++ distanceLimit += len; ++ else ++ distanceLimit = dictionarySize; ++ #endif ++ ++ do ++ { ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ previousByte = dictionary[pos]; ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #else ++ previousByte = outStream[nowPos - rep0]; ++ #endif ++ len--; ++ outStream[nowPos++] = previousByte; ++ } ++ while(len != 0 && nowPos < outSize); ++ } ++ } ++ RC_NORMALIZE; ++ ++ #ifdef _LZMA_OUT_READ ++ vs->Range = Range; ++ vs->Code = Code; ++ vs->DictionaryPos = dictionaryPos; ++ vs->GlobalPos = globalPos + (UInt32)nowPos; ++ vs->DistanceLimit = distanceLimit; ++ vs->Reps[0] = rep0; ++ vs->Reps[1] = rep1; ++ vs->Reps[2] = rep2; ++ vs->Reps[3] = rep3; ++ vs->State = state; ++ vs->RemainLen = len; ++ vs->TempDictionary[0] = tempDictionary[0]; ++ #endif ++ ++ #ifdef _LZMA_IN_CB ++ vs->Buffer = Buffer; ++ vs->BufferLim = BufferLim; ++ #else ++ *inSizeProcessed = (SizeT)(Buffer - inStream); ++ #endif ++ *outSizeProcessed = nowPos; ++ return LZMA_RESULT_OK; ++} +diff --git a/arch/i386/boot/compressed/LzmaDecode.h b/arch/i386/boot/compressed/LzmaDecode.h +new file mode 100644 +index 0000000..213062a +--- /dev/null ++++ b/arch/i386/boot/compressed/LzmaDecode.h +@@ -0,0 +1,131 @@ ++/* ++ LzmaDecode.h ++ LZMA Decoder interface ++ ++ LZMA SDK 4.21 Copyright (c) 1999-2005 Igor Pavlov (2005-06-08) ++ http://www.7-zip.org/ ++ ++ LZMA SDK is licensed under two licenses: ++ 1) GNU Lesser General Public License (GNU LGPL) ++ 2) Common Public License (CPL) ++ It means that you can select one of these two licenses and ++ follow rules of that license. ++ ++ SPECIAL EXCEPTION: ++ Igor Pavlov, as the author of this code, expressly permits you to ++ statically or dynamically link your code (or bind by name) to the ++ interfaces of this file without subjecting your linked code to the ++ terms of the CPL or GNU LGPL. Any modifications or additions ++ to this file, however, are subject to the LGPL or CPL terms. ++*/ ++ ++#ifndef __LZMADECODE_H ++#define __LZMADECODE_H ++ ++/* #define _LZMA_IN_CB */ ++/* Use callback for input data */ ++ ++/* #define _LZMA_OUT_READ */ ++/* Use read function for output data */ ++ ++/* #define _LZMA_PROB32 */ ++/* It can increase speed on some 32-bit CPUs, ++ but memory usage will be doubled in that case */ ++ ++/* #define _LZMA_LOC_OPT */ ++/* Enable local speed optimizations inside code */ ++ ++/* #define _LZMA_SYSTEM_SIZE_T */ ++/* Use system's size_t. You can use it to enable 64-bit sizes supporting*/ ++ ++#ifndef UInt32 ++#ifdef _LZMA_UINT32_IS_ULONG ++#define UInt32 unsigned long ++#else ++#define UInt32 unsigned int ++#endif ++#endif ++ ++#ifndef SizeT ++#ifdef _LZMA_SYSTEM_SIZE_T ++#include <stddef.h> ++#define SizeT size_t ++#else ++#define SizeT UInt32 ++#endif ++#endif ++ ++#ifdef _LZMA_PROB32 ++#define CProb UInt32 ++#else ++#define CProb unsigned short ++#endif ++ ++#define LZMA_RESULT_OK 0 ++#define LZMA_RESULT_DATA_ERROR 1 ++ ++#ifdef _LZMA_IN_CB ++typedef struct _ILzmaInCallback ++{ ++ int (*Read)(void *object, const unsigned char **buffer, SizeT *bufferSize); ++} ILzmaInCallback; ++#endif ++ ++#define LZMA_BASE_SIZE 1846 ++#define LZMA_LIT_SIZE 768 ++ ++#define LZMA_PROPERTIES_SIZE 5 ++ ++typedef struct _CLzmaProperties ++{ ++ int lc; ++ int lp; ++ int pb; ++ #ifdef _LZMA_OUT_READ ++ UInt32 DictionarySize; ++ #endif ++}CLzmaProperties; ++ ++int LzmaDecodeProperties(CLzmaProperties *propsRes, const unsigned char *propsData, int size); ++ ++#define LzmaGetNumProbs(Properties) (LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((Properties)->lc + (Properties)->lp))) ++ ++#define kLzmaNeedInitId (-2) ++ ++typedef struct _CLzmaDecoderState ++{ ++ CLzmaProperties Properties; ++ CProb *Probs; ++ ++ #ifdef _LZMA_IN_CB ++ const unsigned char *Buffer; ++ const unsigned char *BufferLim; ++ #endif ++ ++ #ifdef _LZMA_OUT_READ ++ unsigned char *Dictionary; ++ UInt32 Range; ++ UInt32 Code; ++ UInt32 DictionaryPos; ++ UInt32 GlobalPos; ++ UInt32 DistanceLimit; ++ UInt32 Reps[4]; ++ int State; ++ int RemainLen; ++ unsigned char TempDictionary[4]; ++ #endif ++} CLzmaDecoderState; ++ ++#ifdef _LZMA_OUT_READ ++#define LzmaDecoderInit(vs) { (vs)->RemainLen = kLzmaNeedInitId; } ++#endif ++ ++int LzmaDecode(CLzmaDecoderState *vs, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *inCallback, ++ #else ++ const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed, ++ #endif ++ unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed); ++ ++#endif +diff --git a/arch/i386/boot/compressed/Makefile b/arch/i386/boot/compressed/Makefile +index a661217..fb40869 100644 +--- a/arch/i386/boot/compressed/Makefile ++++ b/arch/i386/boot/compressed/Makefile +@@ -4,15 +4,16 @@ + # create a compressed vmlinux image from the original vmlinux + # + +-targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o \ +- vmlinux.bin.all vmlinux.relocs ++tragets := head.o lzma_misc.o piggy.o \ ++ vmlinux.bin.all vmlinux.relocs \ ++ vmlinux vmlinux.bin vmlinux.bin.gz + EXTRA_AFLAGS := -traditional + + LDFLAGS_vmlinux := -T +-CFLAGS_misc.o += -fPIC ++CFLAGS_lzma_misc.o += -fPIC + hostprogs-y := relocs + +-$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE ++$(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/lzma_misc.o $(obj)/piggy.o FORCE + $(call if_changed,ld) + @: + +@@ -33,10 +34,10 @@ $(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE + + ifdef CONFIG_RELOCATABLE + $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE +- $(call if_changed,gzip) ++ $(call if_changed,lzma) + else + $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE +- $(call if_changed,gzip) ++ $(call if_changed,lzma) + endif + + LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T +diff --git a/arch/i386/boot/compressed/lzma_misc.c b/arch/i386/boot/compressed/lzma_misc.c +new file mode 100644 +index 0000000..4f5f7f9 +--- /dev/null ++++ b/arch/i386/boot/compressed/lzma_misc.c +@@ -0,0 +1,290 @@ ++/* ++ * lzma_misc.c ++ * ++ * Decompress LZMA compressed vmlinuz ++ * Version 0.9 Copyright (c) Ming-Ching Tiew mctiew@yahoo.com ++ * Program adapted from misc.c for 2.6.20.1 kernel ++ * Please refer to misc.c for authorship and copyright. ++ * Date: 25 March 2007 ++ * Source released under GPL ++ */ ++ ++#undef CONFIG_PARAVIRT ++#include <linux/linkage.h> ++#include <linux/vmalloc.h> ++#include <linux/screen_info.h> ++#include <asm/io.h> ++#include <asm/page.h> ++#include <asm/boot.h> ++ ++/* WARNING!! ++ * This code is compiled with -fPIC and it is relocated dynamically ++ * at run time, but no relocation processing is performed. ++ * This means that it is not safe to place pointers in static structures. ++ */ ++ ++#define OF(args) args ++#define STATIC static ++ ++#undef memset ++#undef memcpy ++ ++typedef unsigned char uch; ++typedef unsigned short ush; ++typedef unsigned long ulg; ++ ++#define WSIZE 0x80000000 /* Window size must be at least 32k, ++ * and a power of two ++ * We don't actually have a window just ++ * a huge output buffer so I report ++ * a 2G windows size, as that should ++ * always be larger than our output buffer. ++ */ ++ ++static uch *inbuf; /* input buffer */ ++static uch *window; /* Sliding window buffer, (and final output buffer) */ ++ ++static unsigned insize; /* valid bytes in inbuf */ ++static unsigned inptr; /* index of next byte to be processed in inbuf */ ++ ++/* gzip flag byte */ ++#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ ++#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ ++#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ ++#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ ++#define COMMENT 0x10 /* bit 4 set: file comment present */ ++#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ ++#define RESERVED 0xC0 /* bit 6,7: reserved */ ++ ++#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) ++ ++/* Diagnostic functions */ ++#ifdef DEBUG ++# define Assert(cond,msg) {if(!(cond)) error(msg);} ++# define Trace(x) fprintf x ++# define Tracev(x) {if (verbose) fprintf x ;} ++# define Tracevv(x) {if (verbose>1) fprintf x ;} ++# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} ++# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;} ++#else ++# define Assert(cond,msg) ++# define Trace(x) ++# define Tracev(x) ++# define Tracevv(x) ++# define Tracec(c,x) ++# define Tracecv(c,x) ++#endif ++ ++static int fill_inbuf(void); ++static void error(char *m); ++ ++/* ++ * This is set up by the setup-routine at boot-time ++ */ ++static unsigned char *real_mode; /* Pointer to real-mode data */ ++ ++#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2)) ++#ifndef STANDARD_MEMORY_BIOS_CALL ++#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0)) ++#endif ++#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0)) ++ ++extern unsigned char input_data[]; ++extern int input_len; ++ ++static long bytes_out = 0; ++ ++static void *memcpy(void *dest, const void *src, unsigned n); ++ ++static void putstr(const char *); ++ ++static unsigned long free_mem_ptr; ++static unsigned long free_mem_end_ptr; ++ ++#define HEAP_SIZE 0x3000 ++ ++static char *vidmem = (char *)0xb8000; ++static int vidport; ++static int lines, cols; ++ ++#ifdef CONFIG_X86_NUMAQ ++void *xquad_portio; ++#endif ++ ++static void scroll(void) ++{ ++ int i; ++ ++ memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 ); ++ for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 ) ++ vidmem[i] = ' '; ++} ++ ++static void putstr(const char *s) ++{ ++ int x,y,pos; ++ char c; ++ ++ x = RM_SCREEN_INFO.orig_x; ++ y = RM_SCREEN_INFO.orig_y; ++ ++ while ( ( c = *s++ ) != '\0' ) { ++ if ( c == '\n' ) { ++ x = 0; ++ if ( ++y >= lines ) { ++ scroll(); ++ y--; ++ } ++ } else { ++ vidmem [ ( x + cols * y ) * 2 ] = c; ++ if ( ++x >= cols ) { ++ x = 0; ++ if ( ++y >= lines ) { ++ scroll(); ++ y--; ++ } ++ } ++ } ++ } ++ ++ RM_SCREEN_INFO.orig_x = x; ++ RM_SCREEN_INFO.orig_y = y; ++ ++ pos = (x + cols * y) * 2; /* Update cursor position */ ++ outb_p(14, vidport); ++ outb_p(0xff & (pos >> 9), vidport+1); ++ outb_p(15, vidport); ++ outb_p(0xff & (pos >> 1), vidport+1); ++} ++ ++static void* memcpy(void* dest, const void* src, unsigned n) ++{ ++ int i; ++ char *d = (char *)dest, *s = (char *)src; ++ ++ for (i=0;i<n;i++) d[i] = s[i]; ++ return dest; ++} ++ ++/* =========================================================================== ++ * Fill the input buffer. This is called only when the buffer is empty ++ * and at least one byte is really needed. ++ */ ++static int fill_inbuf(void) ++{ ++ error("ran out of input data"); ++ return 0; ++} ++ ++/* =========================================================================== ++ */ ++static void error(char *x) ++{ ++ putstr("\n\n"); ++ putstr(x); ++ putstr("\n\n -- System halted"); ++ ++ while(1); /* Halt */ ++} ++ ++#define _LZMA_IN_CB ++#include "LzmaDecode.h" ++#include "LzmaDecode.c" ++ ++static int read_byte(void *object, const unsigned char **buffer, SizeT *bufferSize); ++ ++/* ++ * Do the lzma decompression ++ */ ++static int lzma_unzip(uch* output) ++{ ++ ++ unsigned int i; ++ CLzmaDecoderState state; ++ unsigned int uncompressedSize = 0; ++ unsigned char* p; ++ ++ ILzmaInCallback callback; ++ callback.Read = read_byte; ++ ++ // lzma args ++ i = get_byte(); ++ state.Properties.lc = i % 9, i = i / 9; ++ state.Properties.lp = i % 5, state.Properties.pb = i / 5; ++ ++ // skip dictionary size ++ for (i = 0; i < 4; i++) ++ get_byte(); ++ // get uncompressed size ++ p= (char*)&uncompressedSize; ++ for (i = 0; i < 4; i++) ++ *p++ = get_byte(); ++ ++ // skip high order bytes ++ for (i = 0; i < 4; i++) ++ get_byte(); ++ ++ // Just point it beyond ++ state.Probs = (CProb*) ( free_mem_ptr ); ++ // decompress kernel ++ if (LzmaDecode( &state, &callback, ++ (unsigned char*)output, uncompressedSize, &i) == LZMA_RESULT_OK) ++ { ++ if ( i != uncompressedSize ) ++ error( "kernel corrupted!\n"); ++ bytes_out = i; ++ return 0; ++ } ++ return 1; ++} ++ ++ ++static int read_byte(void *object, const unsigned char **buffer, SizeT *bufferSize) ++{ ++ static unsigned int i = 0; ++ static unsigned char val; ++ *bufferSize = 1; ++ val = get_byte(); ++ *buffer = &val; ++ return LZMA_RESULT_OK; ++} ++ ++asmlinkage void decompress_kernel(void *rmode, unsigned long end, ++ uch *input_data, unsigned long input_len, uch *output) ++{ ++ real_mode = rmode; ++ ++ if (RM_SCREEN_INFO.orig_video_mode == 7) { ++ vidmem = (char *) 0xb0000; ++ vidport = 0x3b4; ++ } else { ++ vidmem = (char *) 0xb8000; ++ vidport = 0x3d4; ++ } ++ ++ lines = RM_SCREEN_INFO.orig_video_lines; ++ cols = RM_SCREEN_INFO.orig_video_cols; ++ ++ window = output; /* Output buffer (Normally at 1M) */ ++ free_mem_ptr = end; /* Heap */ ++ free_mem_end_ptr = end + HEAP_SIZE; ++ inbuf = input_data; /* Input buffer */ ++ insize = input_len; ++ inptr = 0; ++ ++ if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1)) ++ error("Destination address not CONFIG_PHYSICAL_ALIGN aligned"); ++ if (end > ((-__PAGE_OFFSET-(512 <<20)-1) & 0x7fffffff)) ++ error("Destination address too large"); ++#ifndef CONFIG_RELOCATABLE ++ if ((u32)output != LOAD_PHYSICAL_ADDR) ++ error("Wrong destination address"); ++#endif ++ if( lzma_unzip(output) != 0 ) ++ { ++ error("inflate error\n"); ++ } ++ putstr("Ok, booting the kernel.\n"); ++ ++ return; ++} +diff --git a/arch/i386/boot/compressed/vmlinux.scr b/arch/i386/boot/compressed/vmlinux.scr +index 707a88f..9d67263 100644 +--- a/arch/i386/boot/compressed/vmlinux.scr ++++ b/arch/i386/boot/compressed/vmlinux.scr +@@ -3,8 +3,8 @@ SECTIONS + .data.compressed : { + input_len = .; + LONG(input_data_end - input_data) input_data = .; ++ output_len = . + 5; + *(.data) +- output_len = . - 4; + input_data_end = .; + } + } +diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig +index 17ee97f..64b7bda 100644 +--- a/drivers/block/Kconfig ++++ b/drivers/block/Kconfig +@@ -406,6 +406,47 @@ config BLK_DEV_RAM_BLOCKSIZE + setups function - apparently needed by the rd_load_image routine + that supposes the filesystem in the image uses a 1024 blocksize. + ++config LZMA_INITRD ++ boolean "Allow LZMA compression on initrd" ++ depends on BLK_DEV_INITRD=y ++ default "y" ++ help ++ Use lzma compression on initrd, example 'lzma e initrd initrd.7z -d16'. ++ If you have sufficient memory, you could compress using bigger dictionary size, ++ 'lzma e initrd initrd.7z'. ++ ++config LZMA_INITRD_KMALLOC_ONLY ++ boolean "Use only kmalloc, do not use vmalloc on lzma initrd" ++ depends on LZMA_INITRD=y ++ default "n" ++ help ++ Set to y if you do not want to use vmalloc, ie use only kmalloc. ++ ++config LZMA_INITRAM_FS ++ boolean "Allow LZMA compression on initramfs" ++ depends on BLK_DEV_RAM=y ++ default "y" ++ help ++ Use lzma compression on initramfs, example 'lzma e initramfs.cpio initramfs.cpio.lzma'. ++ ++config LZMA_INITRAM_FS_SMALLMEM ++ boolean "Use lzma compression with small dictonary size." ++ depends on LZMA_INITRAM_FS=y ++ default "y" ++ help ++ Use lzma compression on initramfs with small dictionary size, example ++ 'lzma e initramfs.cpio initramfs.cpio.lzma -d16'. ++ Affects only the initramfs.cpio in the ~usr directory, which is compiled into ++ the kernel. If you prepared initramfs.cpio for use with bootloader, you would ++ need to specify the commandline options (-d16) yourself. ++ ++config LZMA_INITRAM_FS_KMALLOC_ONLY ++ boolean "Use only kmalloc, do not use vmalloc on lzma initramfs" ++ depends on LZMA_INITRAM_FS=y ++ default "n" ++ help ++ Set to y if you do not want to use vmalloc, ie use only kmalloc. ++ + config CDROM_PKTCDVD + tristate "Packet writing on CD/DVD media" + depends on !UML +diff --git a/fs/Kconfig b/fs/Kconfig +index 3c4886b..bdcc6fb 100644 +--- a/fs/Kconfig ++++ b/fs/Kconfig +@@ -1371,6 +1371,71 @@ config CRAMFS + + If unsure, say N. + ++config SQUASHFS ++ tristate "SquashFS 3.2 - Squashed file system support" ++ select ZLIB_INFLATE ++ help ++ Saying Y here includes support for SquashFS 3.2 (a Compressed Read-Only File ++ System). Squashfs is a highly compressed read-only filesystem for Linux. ++ It uses zlib compression to compress both files, inodes and directories. ++ Inodes in the system are very small and all blocks are packed to minimise ++ data overhead. Block sizes greater than 4K are supported up to a maximum of 64K. ++ SquashFS 3.1 supports 64 bit filesystems and files (larger than 4GB), full ++ uid/gid information, hard links and timestamps. ++ ++ Squashfs is intended for general read-only filesystem use, for archival ++ use (i.e. in cases where a .tar.gz file may be used), and in embedded ++ systems where low overhead is needed. Further information and filesystem tools ++ are available from http://squashfs.sourceforge.net. ++ ++ If you want to compile this as a module ( = code which can be ++ inserted in and removed from the running kernel whenever you want), ++ say M here and read <file:Documentation/modules.txt>. The module ++ will be called squashfs. Note that the root file system (the one ++ containing the directory /) cannot be compiled as a module. ++ ++ If unsure, say N. ++ ++config SQUASHFS_EMBEDDED ++ ++ bool "Additional options for memory-constrained systems" ++ depends on SQUASHFS ++ default n ++ help ++ Saying Y here allows you to specify cache sizes and how Squashfs ++ allocates memory. This is only intended for memory constrained ++ systems. ++ ++ If unsure, say N. ++ ++config SQUASHFS_FRAGMENT_CACHE_SIZE ++ int "Number of fragments cached" if SQUASHFS_EMBEDDED ++ depends on SQUASHFS ++ default "3" ++ help ++ By default SquashFS caches the last 3 fragments read from ++ the filesystem. Increasing this amount may mean SquashFS ++ has to re-read fragments less often from disk, at the expense ++ of extra system memory. Decreasing this amount will mean ++ SquashFS uses less memory at the expense of extra reads from disk. ++ ++ Note there must be at least one cached fragment. Anything ++ much more than three will probably not make much difference. ++ ++config SQUASHFS_VMALLOC ++ bool "Use Vmalloc rather than Kmalloc" if SQUASHFS_EMBEDDED ++ depends on SQUASHFS ++ default n ++ help ++ By default SquashFS uses kmalloc to obtain fragment cache memory. ++ Kmalloc memory is the standard kernel allocator, but it can fail ++ on memory constrained systems. Because of the way Vmalloc works, ++ Vmalloc can succeed when kmalloc fails. Specifying this option ++ will make SquashFS always use Vmalloc to allocate the ++ fragment cache memory. ++ ++ If unsure, say N. ++ + config VXFS_FS + tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" + depends on BLOCK +@@ -2057,3 +2122,4 @@ source "fs/dlm/Kconfig" + + endmenu + ++source "fs/aufs/Kconfig" +diff --git a/fs/Makefile b/fs/Makefile +index 9edf411..557766f 100644 +--- a/fs/Makefile ++++ b/fs/Makefile +@@ -68,6 +68,7 @@ obj-$(CONFIG_JBD) += jbd/ + obj-$(CONFIG_JBD2) += jbd2/ + obj-$(CONFIG_EXT2_FS) += ext2/ + obj-$(CONFIG_CRAMFS) += cramfs/ ++obj-$(CONFIG_SQUASHFS) += squashfs/ + obj-$(CONFIG_RAMFS) += ramfs/ + obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ + obj-$(CONFIG_CODA_FS) += coda/ +@@ -114,3 +115,4 @@ obj-$(CONFIG_HPPFS) += hppfs/ + obj-$(CONFIG_DEBUG_FS) += debugfs/ + obj-$(CONFIG_OCFS2_FS) += ocfs2/ + obj-$(CONFIG_GFS2_FS) += gfs2/ ++obj-$(CONFIG_AUFS) += aufs/ +diff --git a/fs/aufs/Kconfig b/fs/aufs/Kconfig +new file mode 100644 +index 0000000..3a2121c +--- /dev/null ++++ b/fs/aufs/Kconfig +@@ -0,0 +1,73 @@ ++config AUFS ++ tristate "Another unionfs" ++ help ++ Aufs is a stackable unification filesystem such as Unionfs, ++ which unifies several directories and provides a merged single ++ directory. ++ In the early days, aufs was entirely re-designed and ++ re-implemented Unionfs Version 1.x series. After many original ++ ideas, approaches and improvements, it becomes totally ++ different from Unionfs while keeping the basic features. ++ See Unionfs for the basic features. ++ ++if AUFS ++comment "These options are generated automatically for "#UTS_RELEASE ++ ++config AUFS_FAKE_DM ++ bool "Use simplified (fake) nameidata" ++ depends on AUFS ++ default y ++ help ++ Faking nameidata (VFS internal data), you can get better performance ++ in some cases. ++ ++choice ++ prompt "Maximum number of branches" ++ depends on AUFS ++ default AUFS_BRANCH_MAX_127 ++ help ++ Specifies the maximum number of branches (or member directories) in a single aufs. The larger value consumes more system resources and has an impact to performance. ++config AUFS_BRANCH_MAX_127 ++ bool "127" ++ help ++ Specifies the maximum number of branches (or member directories) in a single aufs. The larger value consumes more system resources and has an impact to performance. ++config AUFS_BRANCH_MAX_511 ++ bool "511" ++ help ++ Specifies the maximum number of branches (or member directories) in a single aufs. The larger value consumes more system resources and has an impact to performance. ++config AUFS_BRANCH_MAX_1023 ++ bool "1023" ++ help ++ Specifies the maximum number of branches (or member directories) in a single aufs. The larger value consumes more system resources and has an impact to performance. ++ ++config AUFS_BRANCH_MAX_32767 ++ bool "32767" ++ help ++ Specifies the maximum number of branches (or member directories) in a single aufs. The larger value consumes more system resources and has an impact to performance. ++endchoice ++config AUFS_DEBUG ++ bool "Debug aufs" ++ depends on AUFS ++ default y ++ help ++ Enable this to compile aufs internal debug code. ++ The performance will be damaged. ++ ++config AUFS_COMPAT ++ bool "Compatibility with Unionfs (obsolete)" ++ depends on AUFS ++ default n ++ help ++ This makes aufs compatible with unionfs-style mount options and some ++ behaviours. ++ The dirs= mount option and =nfsro branch permission flag are always ++ interpreted as br: mount option and =ro flag respectively. The ++ 'debug', 'delete' and 'imap' mount options are ignored. ++ If you disable this option, you will get, ++ - aufs issues a warning about the ignored mount options ++ - the default branch permission flag is set. RW for the first branch, ++ and RO for the rests. ++ - the name of a internal file which represents the directory is ++ 'opaque', becomes '.wh..wh..opq' ++ - the 'diropq=w' mount option is set by default ++endif +diff --git a/fs/aufs/Makefile b/fs/aufs/Makefile +new file mode 100755 +index 0000000..0ee3cd0 +--- /dev/null ++++ b/fs/aufs/Makefile +@@ -0,0 +1,18 @@ ++# AUFS Makefile for the Linux 2.6.16 and later ++# $Id: Makefile,v 1.29 2007/04/23 00:59:50 sfjro Exp $ ++ ++obj-$(CONFIG_AUFS) += aufs.o ++aufs-y := module.o super.o sbinfo.o xino.o \ ++ branch.o cpup.o whout.o plink.o wkq.o dcsub.o vfsub.o \ ++ opts.o \ ++ dentry.o dinfo.o \ ++ file.o f_op.o finfo.o \ ++ dir.o vdir.o \ ++ inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o iinfo.o \ ++ misc.o ++#xattr.o ++aufs-$(CONFIG_AUFS_SYSAUFS) += sysaufs.o ++aufs-$(CONFIG_AUFS_HINOTIFY) += hinotify.o ++aufs-$(CONFIG_AUFS_EXPORT) += export.o ++#aufs-$(CONFIG_DEBUGFS) += dbgfs.o ++aufs-$(CONFIG_AUFS_DEBUG) += debug.o +diff --git a/fs/aufs/aufs.h b/fs/aufs/aufs.h +new file mode 100755 +index 0000000..79b3b87 +--- /dev/null ++++ b/fs/aufs/aufs.h +@@ -0,0 +1,64 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: aufs.h,v 1.24 2007/05/14 03:41:51 sfjro Exp $ */ ++ ++#ifndef __AUFS_H__ ++#define __AUFS_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/version.h> ++ ++/* limited support before 2.6.16, curretly 2.6.15 only. */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ++#define atomic_long_t atomic_t ++#define atomic_long_set atomic_set ++#define timespec_to_ns(ts) ({(long long)(ts)->tv_sec;}) ++#define D_CHILD d_child ++#else ++#define D_CHILD d_u.d_child ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++#include "debug.h" ++ ++#include "branch.h" ++#include "cpup.h" ++#include "dcsub.h" ++#include "dentry.h" ++#include "dir.h" ++#include "file.h" ++#include "inode.h" ++#include "misc.h" ++#include "module.h" ++#include "opts.h" ++#include "super.h" ++#include "sysaufs.h" ++#include "vfsub.h" ++#include "whout.h" ++#include "wkq.h" ++//#include "xattr.h" ++ ++#if defined(CONFIG_AUFS_MODULE) && !defined(CONFIG_AUFS_KSIZE_PATCH) ++#define ksize(p) (-1U) ++#endif ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_H__ */ +diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c +new file mode 100755 +index 0000000..f1ce008 +--- /dev/null ++++ b/fs/aufs/branch.c +@@ -0,0 +1,818 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: branch.c,v 1.49 2007/05/14 03:38:23 sfjro Exp $ */ ++ ++//#include <linux/fs.h> ++//#include <linux/namei.h> ++#include "aufs.h" ++ ++static void free_branch(struct aufs_branch *br) ++{ ++ TraceEnter(); ++ ++ if (br->br_xino) ++ fput(br->br_xino); ++ dput(br->br_wh); ++ dput(br->br_plink); ++ mntput(br->br_mnt); ++ DEBUG_ON(br_count(br) || atomic_read(&br->br_wh_running)); ++ kfree(br); ++} ++ ++/* ++ * frees all branches ++ */ ++void free_branches(struct aufs_sbinfo *sbinfo) ++{ ++ aufs_bindex_t bmax; ++ struct aufs_branch **br; ++ ++ TraceEnter(); ++ bmax = sbinfo->si_bend + 1; ++ br = sbinfo->si_branch; ++ while (bmax--) ++ free_branch(*br++); ++} ++ ++/* ++ * find the index of a branch which is specified by @br_id. ++ */ ++int find_brindex(struct super_block *sb, aufs_bindex_t br_id) ++{ ++ aufs_bindex_t bindex, bend; ++ ++ TraceEnter(); ++ ++ bend = sbend(sb); ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (sbr_id(sb, bindex) == br_id) ++ return bindex; ++ return -1; ++} ++ ++/* ++ * test if the @br is readonly or not. ++ */ ++int br_rdonly(struct aufs_branch *br) ++{ ++ return ((br->br_mnt->mnt_sb->s_flags & MS_RDONLY) ++ || !br_writable(br->br_perm)) ++ ? -EROFS : 0; ++} ++ ++/* ++ * returns writable branch index, otherwise an error. ++ * todo: customizable writable-branch-policy ++ */ ++static int find_rw_parent(struct dentry *dentry, aufs_bindex_t bend) ++{ ++ int err; ++ aufs_bindex_t bindex, candidate; ++ struct super_block *sb; ++ struct dentry *parent, *hidden_parent; ++ ++ err = bend; ++ sb = dentry->d_sb; ++ parent = dget_parent(dentry); ++#if 1 // branch policy ++ hidden_parent = au_h_dptr_i(parent, bend); ++ if (hidden_parent && !br_rdonly(stobr(sb, bend))) ++ goto out; /* success */ ++#endif ++ ++ candidate = -1; ++ for (bindex = dbstart(parent); bindex <= bend; bindex++) { ++ hidden_parent = au_h_dptr_i(parent, bindex); ++ if (hidden_parent && !br_rdonly(stobr(sb, bindex))) { ++#if 0 // branch policy ++ if (candidate == -1) ++ candidate = bindex; ++ if (!au_test_perm(hidden_parent->d_inode, MAY_WRITE)) ++ return bindex; ++#endif ++ err = bindex; ++ goto out; /* success */ ++ } ++ } ++#if 0 // branch policy ++ err = candidate; ++ if (candidate != -1) ++ goto out; /* success */ ++#endif ++ err = -EROFS; ++ ++ out: ++ dput(parent); ++ return err; ++} ++ ++int find_rw_br(struct super_block *sb, aufs_bindex_t bend) ++{ ++ aufs_bindex_t bindex; ++ ++ for (bindex = bend; bindex >= 0; bindex--) ++ if (!br_rdonly(stobr(sb, bindex))) ++ return bindex; ++ return -EROFS; ++} ++ ++int find_rw_parent_br(struct dentry *dentry, aufs_bindex_t bend) ++{ ++ int err; ++ ++ err = find_rw_parent(dentry, bend); ++ if (err >= 0) ++ return err; ++ return find_rw_br(dentry->d_sb, bend); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * test if two hidden_dentries have overlapping branches. ++ */ ++//todo: try is_subdir() ++static int do_is_overlap(struct super_block *sb, struct dentry *hidden_d1, ++ struct dentry *hidden_d2) ++{ ++ struct dentry *d; ++ ++ d = hidden_d1; ++ do { ++ if (unlikely(d == hidden_d2)) ++ return 1; ++ d = d->d_parent; // dget_parent() ++ } while (!IS_ROOT(d)); ++ ++ return (d == hidden_d2); ++} ++ ++#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE) ++#include <linux/loop.h> ++static int is_overlap_loopback(struct super_block *sb, struct dentry *hidden_d1, ++ struct dentry *hidden_d2) ++{ ++ struct inode *hidden_inode; ++ struct loop_device *l; ++ ++ hidden_inode = hidden_d1->d_inode; ++ if (MAJOR(hidden_inode->i_sb->s_dev) != LOOP_MAJOR) ++ return 0; ++ ++ l = hidden_inode->i_sb->s_bdev->bd_disk->private_data; ++ hidden_d1 = l->lo_backing_file->f_dentry; ++ if (unlikely(hidden_d1->d_sb == sb)) ++ return 1; ++ return do_is_overlap(sb, hidden_d1, hidden_d2); ++} ++#else ++#define is_overlap_loopback(sb, hidden_d1, hidden_d2) 0 ++#endif ++ ++static int is_overlap(struct super_block *sb, struct dentry *hidden_d1, ++ struct dentry *hidden_d2) ++{ ++ LKTRTrace("d1 %.*s, d2 %.*s\n", DLNPair(hidden_d1), DLNPair(hidden_d2)); ++ if (unlikely(hidden_d1 == hidden_d2)) ++ return 1; ++ return do_is_overlap(sb, hidden_d1, hidden_d2) ++ || do_is_overlap(sb, hidden_d2, hidden_d1) ++ || is_overlap_loopback(sb, hidden_d1, hidden_d2) ++ || is_overlap_loopback(sb, hidden_d2, hidden_d1); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int init_br_wh(struct super_block *sb, aufs_bindex_t bindex, ++ struct aufs_branch *br, int new_perm, ++ struct dentry *h_root, struct vfsmount *h_mnt) ++{ ++ int err, old_perm; ++ struct inode *dir = sb->s_root->d_inode, ++ *h_dir = h_root->d_inode; ++ const int new = (bindex < 0); ++ ++ LKTRTrace("b%d, new_perm %d\n", bindex, new_perm); ++ ++ if (new) ++ hi_lock_parent(h_dir); ++ else ++ hdir_lock(h_dir, dir, bindex); ++ ++ br_wh_write_lock(br); ++ old_perm = br->br_perm; ++ br->br_perm = new_perm; ++ err = init_wh(h_root, br, au_do_nfsmnt(h_mnt), sb); ++ br->br_perm = old_perm; ++ br_wh_write_unlock(br); ++ ++ if (new) ++ i_unlock(h_dir); ++ else ++ hdir_unlock(h_dir, dir, bindex); ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * returns a newly allocated branch. @new_nbranch is a number of branches ++ * after adding a branch. ++ */ ++static struct aufs_branch *alloc_addbr(struct super_block *sb, int new_nbranch) ++{ ++ struct aufs_branch **branchp, *add_branch; ++ int sz; ++ void *p; ++ struct dentry *root; ++ struct inode *inode; ++ struct aufs_hinode *hinodep; ++ struct aufs_hdentry *hdentryp; ++ ++ LKTRTrace("new_nbranch %d\n", new_nbranch); ++ SiMustWriteLock(sb); ++ root = sb->s_root; ++ DiMustWriteLock(root); ++ inode = root->d_inode; ++ IiMustWriteLock(inode); ++ ++ add_branch = kmalloc(sizeof(*add_branch), GFP_KERNEL); ++ //if (LktrCond) {kfree(add_branch); add_branch = NULL;} ++ if (unlikely(!add_branch)) ++ goto out; ++ ++ sz = sizeof(*branchp) * (new_nbranch - 1); ++ if (unlikely(!sz)) ++ sz = sizeof(*branchp); ++ p = stosi(sb)->si_branch; ++ branchp = au_kzrealloc(p, sz, sizeof(*branchp) * new_nbranch, ++ GFP_KERNEL); ++ //if (LktrCond) branchp = NULL; ++ if (unlikely(!branchp)) ++ goto out; ++ stosi(sb)->si_branch = branchp; ++ ++ sz = sizeof(*hdentryp) * (new_nbranch - 1); ++ if (unlikely(!sz)) ++ sz = sizeof(*hdentryp); ++ p = dtodi(root)->di_hdentry; ++ hdentryp = au_kzrealloc(p, sz, sizeof(*hdentryp) * new_nbranch, ++ GFP_KERNEL); ++ //if (LktrCond) hdentryp = NULL; ++ if (unlikely(!hdentryp)) ++ goto out; ++ dtodi(root)->di_hdentry = hdentryp; ++ ++ sz = sizeof(*hinodep) * (new_nbranch - 1); ++ if (unlikely(!sz)) ++ sz = sizeof(*hinodep); ++ p = itoii(inode)->ii_hinode; ++ hinodep = au_kzrealloc(p, sz, sizeof(*hinodep) * new_nbranch, ++ GFP_KERNEL); ++ //if (LktrCond) hinodep = NULL; // unavailable test ++ if (unlikely(!hinodep)) ++ goto out; ++ itoii(inode)->ii_hinode = hinodep; ++ return add_branch; /* success */ ++ ++ out: ++ kfree(add_branch); ++ TraceErr(-ENOMEM); ++ return ERR_PTR(-ENOMEM); ++} ++ ++/* ++ * test if the branch permission is legal or not. ++ */ ++static int test_br(struct super_block *sb, struct inode *inode, int brperm, ++ char *path) ++{ ++ int err; ++ ++ err = 0; ++ if (unlikely(br_writable(brperm) && IS_RDONLY(inode))) { ++ Err("write permission for readonly fs or inode, %s\n", path); ++ err = -EINVAL; ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * retunrs,,, ++ * 0: success, the caller will add it ++ * plus: success, it is already unified, the caller should ignore it ++ * minus: error ++ */ ++static int test_add(struct super_block *sb, struct opt_add *add, int remount) ++{ ++ int err; ++ struct dentry *root; ++ struct inode *inode, *hidden_inode; ++ aufs_bindex_t bend, bindex; ++ ++ LKTRTrace("%s, remo%d\n", add->path, remount); ++ ++ root = sb->s_root; ++ if (unlikely(au_find_dbindex(root, add->nd.dentry) != -1)) { ++ err = 1; ++ if (!remount) { ++ err = -EINVAL; ++ Err("%s duplicated\n", add->path); ++ } ++ goto out; ++ } ++ ++ err = -ENOSPC; //-E2BIG; ++ bend = sbend(sb); ++ //if (LktrCond) bend = AUFS_BRANCH_MAX; ++ if (unlikely(AUFS_BRANCH_MAX <= add->bindex ++ || AUFS_BRANCH_MAX - 1 <= bend)) { ++ Err("number of branches exceeded %s\n", add->path); ++ goto out; ++ } ++ ++ err = -EDOM; ++ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) { ++ Err("bad index %d\n", add->bindex); ++ goto out; ++ } ++ ++ inode = add->nd.dentry->d_inode; ++ DEBUG_ON(!inode || !S_ISDIR(inode->i_mode)); ++ err = -ENOENT; ++ if (unlikely(!inode->i_nlink)) { ++ Err("no existence %s\n", add->path); ++ goto out; ++ } ++ ++ err = -EINVAL; ++ if (unlikely(inode->i_sb == sb)) { ++ Err("%s must be outside\n", add->path); ++ goto out; ++ } ++ ++#if 1 //ndef CONFIG_AUFS_ROBR ++ if (unlikely(au_is_aufs(inode->i_sb) ++ || !strcmp(au_sbtype(inode->i_sb), "unionfs"))) { ++ Err("nested " AUFS_NAME " %s\n", add->path); ++ goto out; ++ } ++#endif ++ ++#ifdef AuNoNfsBranch ++ if (unlikely(au_is_nfs(inode->i_sb))) { ++ Err(AuNoNfsBranchMsg ". %s\n", add->path); ++ goto out; ++ } ++#endif ++ ++ err = test_br(sb, add->nd.dentry->d_inode, add->perm, add->path); ++ if (unlikely(err)) ++ goto out; ++ ++ if (unlikely(bend == -1)) ++ return 0; /* success */ ++ ++ hidden_inode = au_h_dptr(root)->d_inode; ++ if (unlikely(au_flag_test(sb, AuFlag_WARN_PERM) ++ && ((hidden_inode->i_mode & S_IALLUGO) ++ != (inode->i_mode & S_IALLUGO) ++ || hidden_inode->i_uid != inode->i_uid ++ || hidden_inode->i_gid != inode->i_gid))) ++ Warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n", ++ add->path, ++ inode->i_uid, inode->i_gid, (inode->i_mode & S_IALLUGO), ++ hidden_inode->i_uid, hidden_inode->i_gid, ++ (hidden_inode->i_mode & S_IALLUGO)); ++ ++ err = -EINVAL; ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (unlikely(is_overlap(sb, add->nd.dentry, ++ au_h_dptr_i(root, bindex)))) { ++ Err("%s is overlapped\n", add->path); ++ goto out; ++ } ++ err = 0; ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++int br_add(struct super_block *sb, struct opt_add *add, int remount) ++{ ++ int err, sz; ++ aufs_bindex_t bend, add_bindex; ++ struct dentry *root; ++ struct aufs_iinfo *iinfo; ++ struct aufs_sbinfo *sbinfo; ++ struct aufs_dinfo *dinfo; ++ struct inode *root_inode; ++ unsigned long long maxb; ++ struct aufs_branch **branchp, *add_branch; ++ struct aufs_hdentry *hdentryp; ++ struct aufs_hinode *hinodep; ++ ++ LKTRTrace("b%d, %s, 0x%x, %.*s\n", add->bindex, add->path, ++ add->perm, DLNPair(add->nd.dentry)); ++ SiMustWriteLock(sb); ++ root = sb->s_root; ++ DiMustWriteLock(root); ++ root_inode = root->d_inode; ++ IMustLock(root_inode); ++ IiMustWriteLock(root_inode); ++ ++ err = test_add(sb, add, remount); ++ if (unlikely(err < 0)) ++ goto out; ++ if (unlikely(err)) ++ return 0; /* success */ ++ ++ bend = sbend(sb); ++ add_branch = alloc_addbr(sb, bend + 2); ++ err = PTR_ERR(add_branch); ++ if (IS_ERR(add_branch)) ++ goto out; ++ ++ err = 0; ++ rw_init_nolock(&add_branch->br_wh_rwsem); ++ add_branch->br_wh = add_branch->br_plink = NULL; ++ if (unlikely(br_writable(add->perm))) { ++ err = init_br_wh(sb, /*bindex*/-1, add_branch, add->perm, ++ add->nd.dentry, add->nd.mnt); ++ if (unlikely(err)) { ++ kfree(add_branch); ++ goto out; ++ } ++ } ++ add_branch->br_xino = NULL; ++ add_branch->br_mnt = mntget(add->nd.mnt); ++ atomic_set(&add_branch->br_wh_running, 0); ++ add_branch->br_id = new_br_id(sb); ++ add_branch->br_perm = add->perm; ++ atomic_set(&add_branch->br_count, 0); ++ ++ sbinfo = stosi(sb); ++ dinfo = dtodi(root); ++ iinfo = itoii(root_inode); ++ ++ add_bindex = add->bindex; ++ sz = sizeof(*(sbinfo->si_branch)) * (bend + 1 - add_bindex); ++ branchp = sbinfo->si_branch + add_bindex; ++ memmove(branchp + 1, branchp, sz); ++ *branchp = add_branch; ++ sz = sizeof(*hdentryp) * (bend + 1 - add_bindex); ++ hdentryp = dinfo->di_hdentry + add_bindex; ++ memmove(hdentryp + 1, hdentryp, sz); ++ hdentryp->hd_dentry = NULL; ++ sz = sizeof(*hinodep) * (bend + 1 - add_bindex); ++ hinodep = iinfo->ii_hinode + add_bindex; ++ memmove(hinodep + 1, hinodep, sz); ++ hinodep->hi_inode = NULL; ++ hinodep->hi_notify = NULL; ++ ++ sbinfo->si_bend++; ++ dinfo->di_bend++; ++ iinfo->ii_bend++; ++ if (unlikely(bend == -1)) { ++ dinfo->di_bstart = 0; ++ iinfo->ii_bstart = 0; ++ } ++ set_h_dptr(root, add_bindex, dget(add->nd.dentry)); ++ set_h_iptr(root_inode, add_bindex, igrab(add->nd.dentry->d_inode), 0); ++ if (!add_bindex) ++ au_cpup_attr_all(root_inode); ++ else ++ au_add_nlink(root_inode, add->nd.dentry->d_inode); ++ maxb = add->nd.dentry->d_sb->s_maxbytes; ++ if (sb->s_maxbytes < maxb) ++ sb->s_maxbytes = maxb; ++ ++ if (au_flag_test(sb, AuFlag_XINO)) { ++ struct file *base_file = stobr(sb, 0)->br_xino; ++ if (!add_bindex) ++ base_file = stobr(sb, 1)->br_xino; ++ err = xino_init(sb, add_bindex, base_file, /*do_test*/1); ++ if (unlikely(err)) { ++ DEBUG_ON(add_branch->br_xino); ++ Err("ignored xino err %d, force noxino\n", err); ++ err = 0; ++ au_flag_clr(sb, AuFlag_XINO); ++ } ++ } ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * test if the branch is deletable or not. ++ */ ++static int test_children_busy(struct dentry *root, aufs_bindex_t bindex) ++{ ++ int err, i, j, sigen; ++ struct au_dcsub_pages dpages; ++ ++ LKTRTrace("b%d\n", bindex); ++ SiMustWriteLock(root->d_sb); ++ DiMustWriteLock(root); ++ ++ err = au_dpages_init(&dpages, GFP_KERNEL); ++ if (unlikely(err)) ++ goto out; ++ err = au_dcsub_pages(&dpages, root, NULL, NULL); ++ if (unlikely(err)) ++ goto out_dpages; ++ ++ sigen = au_sigen(root->d_sb); ++ DiMustNoWaiters(root); ++ IiMustNoWaiters(root->d_inode); ++ di_write_unlock(root); ++ for (i = 0; !err && i < dpages.ndpage; i++) { ++ struct au_dpage *dpage; ++ dpage = dpages.dpages + i; ++ for (j = 0; !err && j < dpage->ndentry; j++) { ++ struct dentry *d; ++ ++ d = dpage->dentries[j]; ++ if (au_digen(d) == sigen) ++ di_read_lock_child(d, AUFS_I_RLOCK); ++ else { ++ di_write_lock_child(d); ++ err = au_reval_dpath(d, sigen); ++ if (!err) ++ di_downgrade_lock(d, AUFS_I_RLOCK); ++ else { ++ di_write_unlock(d); ++ break; ++ } ++ } ++ ++ if (au_h_dptr_i(d, bindex) ++ && (!S_ISDIR(d->d_inode->i_mode) ++ || dbstart(d) == dbend(d))) ++ err = -EBUSY; ++ di_read_unlock(d, AUFS_I_RLOCK); ++ if (err) ++ LKTRTrace("%.*s\n", DLNPair(d)); ++ } ++ } ++ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */ ++ ++ out_dpages: ++ au_dpages_free(&dpages); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++int br_del(struct super_block *sb, struct opt_del *del, int remount) ++{ ++ int err, do_wh, rerr; ++ struct dentry *root; ++ struct inode *inode, *hidden_dir; ++ aufs_bindex_t bindex, bend, br_id; ++ struct aufs_sbinfo *sbinfo; ++ struct aufs_dinfo *dinfo; ++ struct aufs_iinfo *iinfo; ++ struct aufs_branch *br; ++ ++ LKTRTrace("%s, %.*s\n", del->path, DLNPair(del->h_root)); ++ SiMustWriteLock(sb); ++ root = sb->s_root; ++ DiMustWriteLock(root); ++ inode = root->d_inode; ++ IiMustWriteLock(inode); ++ ++ bindex = au_find_dbindex(root, del->h_root); ++ if (unlikely(bindex < 0)) { ++ if (remount) ++ return 0; /* success */ ++ err = -ENOENT; ++ Err("%s no such branch\n", del->path); ++ goto out; ++ } ++ LKTRTrace("bindex b%d\n", bindex); ++ ++ err = -EBUSY; ++ bend = sbend(sb); ++ br = stobr(sb, bindex); ++ if (unlikely(!bend || br_count(br))) { ++ LKTRTrace("bend %d, br_count %d\n", bend, br_count(br)); ++ goto out; ++ } ++ ++ do_wh = 0; ++ hidden_dir = del->h_root->d_inode; ++ if (unlikely(br->br_wh || br->br_plink)) { ++#if 0 ++ /* remove whiteout base */ ++ err = init_br_wh(sb, bindex, br, AuBr_RO, del->h_root, ++ br->br_mnt); ++ if (unlikely(err)) ++ goto out; ++#else ++ dput(br->br_wh); ++ dput(br->br_plink); ++ br->br_wh = br->br_plink = NULL; ++#endif ++ do_wh = 1; ++ } ++ ++ err = test_children_busy(root, bindex); ++ if (unlikely(err)) { ++ if (unlikely(do_wh)) ++ goto out_wh; ++ goto out; ++ } ++ ++ err = 0; ++ sbinfo = stosi(sb); ++ dinfo = dtodi(root); ++ iinfo = itoii(inode); ++ ++ dput(au_h_dptr_i(root, bindex)); ++ aufs_hiput(iinfo->ii_hinode + bindex); ++ br_id = br->br_id; ++ free_branch(br); ++ ++ //todo: realloc and shrink memeory ++ if (bindex < bend) { ++ const aufs_bindex_t n = bend - bindex; ++ struct aufs_branch **brp; ++ struct aufs_hdentry *hdp; ++ struct aufs_hinode *hip; ++ ++ brp = sbinfo->si_branch + bindex; ++ memmove(brp, brp + 1, sizeof(*brp) * n); ++ hdp = dinfo->di_hdentry + bindex; ++ memmove(hdp, hdp + 1, sizeof(*hdp) * n); ++ hip = iinfo->ii_hinode + bindex; ++ memmove(hip, hip + 1, sizeof(*hip) * n); ++ } ++ sbinfo->si_branch[0 + bend] = NULL; ++ dinfo->di_hdentry[0 + bend].hd_dentry = NULL; ++ iinfo->ii_hinode[0 + bend].hi_inode = NULL; ++ iinfo->ii_hinode[0 + bend].hi_notify = NULL; ++ ++ sbinfo->si_bend--; ++ dinfo->di_bend--; ++ iinfo->ii_bend--; ++ if (!bindex) ++ au_cpup_attr_all(inode); ++ else ++ au_sub_nlink(inode, del->h_root->d_inode); ++ if (au_flag_test(sb, AuFlag_PLINK)) ++ half_refresh_plink(sb, br_id); ++ ++ if (sb->s_maxbytes == del->h_root->d_sb->s_maxbytes) { ++ bend--; ++ sb->s_maxbytes = 0; ++ for (bindex = 0; bindex <= bend; bindex++) { ++ unsigned long long maxb; ++ maxb = sbr_sb(sb, bindex)->s_maxbytes; ++ if (sb->s_maxbytes < maxb) ++ sb->s_maxbytes = maxb; ++ } ++ } ++ goto out; /* success */ ++ ++ out_wh: ++ /* revert */ ++ rerr = init_br_wh(sb, bindex, br, br->br_perm, del->h_root, br->br_mnt); ++ if (rerr) ++ Warn("failed re-creating base whiteout, %s. (%d)\n", ++ del->path, rerr); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static int do_need_sigen_inc(int a, int b) ++{ ++ return (br_whable(a) && !br_whable(b)); ++} ++ ++static int need_sigen_inc(int old, int new) ++{ ++ return (do_need_sigen_inc(old, new) ++ || do_need_sigen_inc(new, old)); ++} ++ ++int br_mod(struct super_block *sb, struct opt_mod *mod, int remount, ++ int *do_update) ++{ ++ int err; ++ struct dentry *root; ++ aufs_bindex_t bindex; ++ struct aufs_branch *br; ++ struct inode *hidden_dir; ++ ++ LKTRTrace("%s, %.*s, 0x%x\n", ++ mod->path, DLNPair(mod->h_root), mod->perm); ++ SiMustWriteLock(sb); ++ root = sb->s_root; ++ DiMustWriteLock(root); ++ IiMustWriteLock(root->d_inode); ++ ++ bindex = au_find_dbindex(root, mod->h_root); ++ if (unlikely(bindex < 0)) { ++ if (remount) ++ return 0; /* success */ ++ err = -ENOENT; ++ Err("%s no such branch\n", mod->path); ++ goto out; ++ } ++ LKTRTrace("bindex b%d\n", bindex); ++ ++ hidden_dir = mod->h_root->d_inode; ++ err = test_br(sb, hidden_dir, mod->perm, mod->path); ++ if (unlikely(err)) ++ goto out; ++ ++ br = stobr(sb, bindex); ++ if (unlikely(br->br_perm == mod->perm)) ++ return 0; /* success */ ++ ++ if (br_writable(br->br_perm)) { ++#if 1 ++ /* remove whiteout base */ ++ //todo: mod->perm? ++ err = init_br_wh(sb, bindex, br, AuBr_RO, mod->h_root, ++ br->br_mnt); ++ if (unlikely(err)) ++ goto out; ++#else ++ dput(br->br_wh); ++ dput(br->br_plink); ++ br->br_wh = br->br_plink = NULL; ++#endif ++ ++ if (!br_writable(mod->perm)) { ++ /* rw --> ro, file might be mmapped */ ++ struct file *file, *hf; ++ ++#if 1 // test here ++ DiMustNoWaiters(root); ++ IiMustNoWaiters(root->d_inode); ++ di_write_unlock(root); ++ ++ // no need file_list_lock() since sbinfo is locked ++ //file_list_lock(); ++ list_for_each_entry(file, &sb->s_files, f_u.fu_list) { ++ LKTRTrace("%.*s\n", DLNPair(file->f_dentry)); ++ fi_read_lock(file); ++ if (!S_ISREG(file->f_dentry->d_inode->i_mode) ++ || !(file->f_mode & FMODE_WRITE) ++ || fbstart(file) != bindex) { ++ FiMustNoWaiters(file); ++ fi_read_unlock(file); ++ continue; ++ } ++ ++ // todo: already flushed? ++ hf = au_h_fptr(file); ++ hf->f_flags = au_file_roflags(hf->f_flags); ++ hf->f_mode &= ~FMODE_WRITE; ++ FiMustNoWaiters(file); ++ fi_read_unlock(file); ++ } ++ //file_list_unlock(); ++ ++ /* aufs_write_lock() calls ..._child() */ ++ di_write_lock_child(root); ++#endif ++ } ++ } ++ ++ *do_update |= need_sigen_inc(br->br_perm, mod->perm); ++ br->br_perm = mod->perm; ++ return err; /* success */ ++ ++ out: ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h +new file mode 100755 +index 0000000..2557836 +--- /dev/null ++++ b/fs/aufs/branch.h +@@ -0,0 +1,235 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: branch.h,v 1.30 2007/05/14 03:41:51 sfjro Exp $ */ ++ ++#ifndef __AUFS_BRANCH_H__ ++#define __AUFS_BRANCH_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/mount.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++#include "misc.h" ++#include "super.h" ++ ++/* protected by superblock rwsem */ ++struct aufs_branch { ++ struct file *br_xino; ++ readf_t br_xino_read; ++ writef_t br_xino_write; ++ ++ aufs_bindex_t br_id; ++ ++ int br_perm; ++ struct vfsmount *br_mnt; ++ atomic_t br_count; ++ ++ /* whiteout base */ ++ struct aufs_rwsem br_wh_rwsem; ++ struct dentry *br_wh; ++ atomic_t br_wh_running; ++ ++ /* pseudo-link dir */ ++ struct dentry *br_plink; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* branch permission and attribute */ ++enum { ++ AuBr_RW, /* writable, linkable wh */ ++ AuBr_RO, /* readonly, no wh */ ++ AuBr_RR, /* natively readonly, no wh */ ++ ++ AuBr_RWNoLinkWH, /* un-linkable whiteouts */ ++ ++ AuBr_ROWH, ++ AuBr_RRWH, /* whiteout-able */ ++ ++ AuBr_Last ++}; ++ ++static inline int br_writable(int brperm) ++{ ++ return (brperm == AuBr_RW ++ || brperm == AuBr_RWNoLinkWH); ++} ++ ++static inline int br_whable(int brperm) ++{ ++ return (brperm == AuBr_RW ++ || brperm == AuBr_ROWH ++ || brperm == AuBr_RRWH); ++} ++ ++static inline int br_linkable_wh(int brperm) ++{ ++ return (brperm == AuBr_RW); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#define _AuNoNfsBranchMsg "NFS branch is not supported" ++#if LINUX_VERSION_CODE == KERNEL_VERSION(2,6,15) ++#define AuNoNfsBranch ++#define AuNoNfsBranchMsg _AuNoNfsBranchMsg ++#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19) \ ++ && !defined(CONFIG_AUFS_LHASH_PATCH) ++#define AuNoNfsBranch ++#define AuNoNfsBranchMsg _AuNoNfsBranchMsg \ ++ ", try lhash.patch and CONFIG_AUFS_LHASH_PATCH" ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct aufs_sbinfo; ++void free_branches(struct aufs_sbinfo *sinfo); ++int br_rdonly(struct aufs_branch *br); ++int find_brindex(struct super_block *sb, aufs_bindex_t br_id); ++int find_rw_br(struct super_block *sb, aufs_bindex_t bend); ++int find_rw_parent_br(struct dentry *dentry, aufs_bindex_t bend); ++struct opt_add; ++int br_add(struct super_block *sb, struct opt_add *add, int remount); ++struct opt_del; ++int br_del(struct super_block *sb, struct opt_del *del, int remount); ++struct opt_mod; ++int br_mod(struct super_block *sb, struct opt_mod *mod, int remount, ++ int *do_update); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline int br_count(struct aufs_branch *br) ++{ ++ return atomic_read(&br->br_count); ++} ++ ++static inline void br_get(struct aufs_branch *br) ++{ ++ atomic_inc(&br->br_count); ++} ++ ++static inline void br_put(struct aufs_branch *br) ++{ ++ atomic_dec(&br->br_count); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* Superblock to branch */ ++static inline aufs_bindex_t sbr_id(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return stobr(sb, bindex)->br_id; ++} ++ ++static inline ++struct vfsmount *sbr_mnt(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return stobr(sb, bindex)->br_mnt; ++} ++ ++static inline ++struct super_block *sbr_sb(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return sbr_mnt(sb, bindex)->mnt_sb; ++} ++ ++#if 0 ++static inline int sbr_count(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return br_count(stobr(sb, bindex)); ++} ++ ++static inline void sbr_get(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ br_get(stobr(sb, bindex)); ++} ++#endif ++ ++static inline void sbr_put(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ br_put(stobr(sb, bindex)); ++} ++ ++static inline int sbr_perm(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return stobr(sb, bindex)->br_perm; ++} ++ ++static inline int sbr_is_whable(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return br_whable(sbr_perm(sb, bindex)); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_LHASH_PATCH ++static inline struct vfsmount *au_do_nfsmnt(struct vfsmount *h_mnt) ++{ ++ if (!au_is_nfs(h_mnt->mnt_sb)) ++ return NULL; ++ return h_mnt; ++} ++ ++/* it doesn't mntget() */ ++static inline ++struct vfsmount *au_nfsmnt(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return au_do_nfsmnt(sbr_mnt(sb, bindex)); ++} ++#else ++static inline struct vfsmount *au_do_nfsmnt(struct vfsmount *h_mnt) ++{ ++ return NULL; ++} ++ ++static inline ++struct vfsmount *au_nfsmnt(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ return NULL; ++} ++#endif /* CONFIG_AUFS_LHASH_PATCH */ ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * br_wh_read_lock, br_wh_write_lock ++ * br_wh_read_unlock, br_wh_write_unlock, br_wh_downgrade_lock ++ */ ++SimpleRwsemFuncs(br_wh, struct aufs_branch *br, br->br_wh_rwsem); ++ ++/* to debug easier, do not make them inlined functions */ ++#define BrWhMustReadLock(br) do { \ ++ /* SiMustAnyLock(sb); */ \ ++ RwMustReadLock(&(br)->br_wh_rwsem); \ ++} while (0) ++ ++#define BrWhMustWriteLock(br) do { \ ++ /* SiMustAnyLock(sb); */ \ ++ RwMustWriteLock(&(br)->br_wh_rwsem); \ ++} while (0) ++ ++#define BrWhMustAnyLock(br) do { \ ++ /* SiMustAnyLock(sb); */ \ ++ RwMustAnyLock(&(br)->br_wh_rwsem); \ ++} while (0) ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_BRANCH_H__ */ +diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c +new file mode 100755 +index 0000000..6636f40 +--- /dev/null ++++ b/fs/aufs/cpup.c +@@ -0,0 +1,773 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: cpup.c,v 1.37 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#include <asm/uaccess.h> ++#include "aufs.h" ++ ++/* violent cpup_attr_*() functions don't care inode lock */ ++void au_cpup_attr_timesizes(struct inode *inode) ++{ ++ struct inode *hidden_inode; ++ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ //IMustLock(inode); ++ hidden_inode = au_h_iptr(inode); ++ DEBUG_ON(!hidden_inode); ++ //IMustLock(!hidden_inode); ++ ++ inode->i_atime = hidden_inode->i_atime; ++ inode->i_mtime = hidden_inode->i_mtime; ++ inode->i_ctime = hidden_inode->i_ctime; ++ spin_lock(&inode->i_lock); ++ i_size_write(inode, i_size_read(hidden_inode)); ++ inode->i_blocks = hidden_inode->i_blocks; ++ spin_unlock(&inode->i_lock); ++} ++ ++void au_cpup_attr_nlink(struct inode *inode) ++{ ++ struct inode *h_inode; ++ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ //IMustLock(inode); ++ DEBUG_ON(!inode->i_mode); ++ ++ h_inode = au_h_iptr(inode); ++ inode->i_nlink = h_inode->i_nlink; ++ ++ /* ++ * fewer nlink makes find(1) noisy, but larger nlink doesn't. ++ * it may includes whplink directory. ++ */ ++ if (unlikely(S_ISDIR(h_inode->i_mode))) { ++ aufs_bindex_t bindex, bend; ++ bend = ibend(inode); ++ for (bindex = ibstart(inode) + 1; bindex <= bend; bindex++) { ++ h_inode = au_h_iptr_i(inode, bindex); ++ if (h_inode) ++ au_add_nlink(inode, h_inode); ++ } ++ } ++} ++ ++void au_cpup_attr_changable(struct inode *inode) ++{ ++ struct inode *hidden_inode; ++ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ //IMustLock(inode); ++ hidden_inode = au_h_iptr(inode); ++ DEBUG_ON(!hidden_inode); ++ ++ inode->i_mode = hidden_inode->i_mode; ++ inode->i_uid = hidden_inode->i_uid; ++ inode->i_gid = hidden_inode->i_gid; ++ au_cpup_attr_timesizes(inode); ++ ++ //?? ++ inode->i_flags = hidden_inode->i_flags; ++} ++ ++void au_cpup_igen(struct inode *inode, struct inode *h_inode) ++{ ++ inode->i_generation = h_inode->i_generation; ++ itoii(inode)->ii_hsb1 = h_inode->i_sb; ++} ++ ++void au_cpup_attr_all(struct inode *inode) ++{ ++ struct inode *hidden_inode; ++ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ //IMustLock(inode); ++ hidden_inode = au_h_iptr(inode); ++ DEBUG_ON(!hidden_inode); ++ ++ au_cpup_attr_changable(inode); ++ if (inode->i_nlink > 0) ++ au_cpup_attr_nlink(inode); ++ ++ switch (inode->i_mode & S_IFMT) { ++ case S_IFBLK: ++ case S_IFCHR: ++ inode->i_rdev = hidden_inode->i_rdev; ++ } ++ inode->i_blkbits = hidden_inode->i_blkbits; ++ au_cpup_attr_blksize(inode, hidden_inode); ++ au_cpup_igen(inode, hidden_inode); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* Note: dt_dentry and dt_hidden_dentry are not dget/dput-ed */ ++ ++/* keep the timestamps of the parent dir when cpup */ ++void dtime_store(struct dtime *dt, struct dentry *dentry, ++ struct dentry *hidden_dentry) ++{ ++ struct inode *inode; ++ ++ TraceEnter(); ++ DEBUG_ON(!dentry || !hidden_dentry || !hidden_dentry->d_inode); ++ ++ dt->dt_dentry = dentry; ++ dt->dt_h_dentry = hidden_dentry; ++ inode = hidden_dentry->d_inode; ++ dt->dt_atime = inode->i_atime; ++ dt->dt_mtime = inode->i_mtime; ++ //smp_mb(); ++} ++ ++// todo: remove extra parameter ++void dtime_revert(struct dtime *dt, int h_parent_is_locked) ++{ ++ struct iattr attr; ++ int err; ++ struct dentry *dentry; ++ ++ LKTRTrace("h_parent locked %d\n", h_parent_is_locked); ++ ++ attr.ia_atime = dt->dt_atime; ++ attr.ia_mtime = dt->dt_mtime; ++ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET ++ | ATTR_ATIME | ATTR_ATIME_SET; ++ //smp_mb(); ++ dentry = NULL; ++ if (!h_parent_is_locked /* && !IS_ROOT(dt->dt_dentry) */) ++ dentry = dt->dt_dentry; ++ err = vfsub_notify_change(dt->dt_h_dentry, &attr, ++ need_dlgt(dt->dt_dentry->d_sb)); ++ if (unlikely(err)) ++ Warn("restoring timestamps failed(%d). ignored\n", err); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int cpup_iattr(struct dentry *hidden_dst, struct dentry *hidden_src, ++ int dlgt) ++{ ++ int err; ++ struct iattr ia; ++ struct inode *hidden_isrc, *hidden_idst; ++ ++ LKTRTrace("%.*s\n", DLNPair(hidden_dst)); ++ hidden_idst = hidden_dst->d_inode; ++ //IMustLock(hidden_idst); ++ hidden_isrc = hidden_src->d_inode; ++ //IMustLock(hidden_isrc); ++ ++ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID ++ | ATTR_ATIME | ATTR_MTIME ++ | ATTR_ATIME_SET | ATTR_MTIME_SET; ++ ia.ia_mode = hidden_isrc->i_mode; ++ ia.ia_uid = hidden_isrc->i_uid; ++ ia.ia_gid = hidden_isrc->i_gid; ++ ia.ia_atime = hidden_isrc->i_atime; ++ ia.ia_mtime = hidden_isrc->i_mtime; ++ err = vfsub_notify_change(hidden_dst, &ia, dlgt); ++ //if (LktrCond) err = -1; ++ if (!err) ++ hidden_idst->i_flags = hidden_isrc->i_flags; //?? ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * to support a sparse file which is opened with O_APPEND, ++ * we need to close the file. ++ */ ++static int cpup_regular(struct dentry *dentry, aufs_bindex_t bdst, ++ aufs_bindex_t bsrc, loff_t len) ++{ ++ int err, i, sparse; ++ struct super_block *sb; ++ struct inode *hidden_inode; ++ enum {SRC, DST}; ++ struct { ++ aufs_bindex_t bindex; ++ unsigned int flags; ++ struct dentry *dentry; ++ struct file *file; ++ void *label, *label_file; ++ } *h, hidden[] = { ++ { ++ .bindex = bsrc, ++ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE, ++ .file = NULL, ++ .label = &&out, ++ .label_file = &&out_src_file ++ }, ++ { ++ .bindex = bdst, ++ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE, ++ .file = NULL, ++ .label = &&out_src_file, ++ .label_file = &&out_dst_file ++ } ++ }; ++ ++ LKTRTrace("dentry %.*s, bdst %d, bsrc %d, len %lld\n", ++ DLNPair(dentry), bdst, bsrc, len); ++ DEBUG_ON(bsrc <= bdst); ++ DEBUG_ON(!len); ++ sb = dentry->d_sb; ++ DEBUG_ON(test_ro(sb, bdst, dentry->d_inode)); ++ // bsrc branch can be ro/rw. ++ ++ h = hidden; ++ for (i = 0; i < 2; i++, h++) { ++ h->dentry = au_h_dptr_i(dentry, h->bindex); ++ DEBUG_ON(!h->dentry); ++ hidden_inode = h->dentry->d_inode; ++ DEBUG_ON(!hidden_inode || !S_ISREG(hidden_inode->i_mode)); ++ h->file = hidden_open(dentry, h->bindex, h->flags); ++ //if (LktrCond) ++ //{fput(h->file); sbr_put(sb, h->bindex); h->file = ERR_PTR(-1);} ++ err = PTR_ERR(h->file); ++ if (IS_ERR(h->file)) ++ goto *h->label; ++ err = -EINVAL; ++ if (unlikely(!h->file->f_op)) ++ goto *h->label_file; ++ } ++ ++ /* stop updating while we copyup */ ++ IMustLock(hidden[SRC].dentry->d_inode); ++ sparse = 0; ++ err = au_copy_file(hidden[DST].file, hidden[SRC].file, len, sb, ++ &sparse); ++ ++ /* sparse file: update i_blocks next time */ ++ if (unlikely(!err && sparse)) ++ d_drop(dentry); ++ ++ out_dst_file: ++ fput(hidden[DST].file); ++ sbr_put(sb, hidden[DST].bindex); ++ out_src_file: ++ fput(hidden[SRC].file); ++ sbr_put(sb, hidden[SRC].bindex); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++// unnecessary? ++unsigned int au_flags_cpup(unsigned int init, struct dentry *parent) ++{ ++ if (unlikely(parent && IS_ROOT(parent))) ++ init |= CPUP_LOCKED_GHDIR; ++ return init; ++} ++ ++/* return with hidden dst inode is locked */ ++static int cpup_entry(struct dentry *dentry, aufs_bindex_t bdst, ++ aufs_bindex_t bsrc, loff_t len, unsigned int flags, ++ int dlgt) ++{ ++ int err, isdir, symlen; ++ struct dentry *hidden_src, *hidden_dst, *hidden_parent, *parent; ++ struct inode *hidden_inode, *hidden_dir, *dir; ++ struct dtime dt; ++ umode_t mode; ++ char *sym; ++ mm_segment_t old_fs; ++ const int do_dt = flags & CPUP_DTIME; ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s, i%lu, bdst %d, bsrc %d, len %Ld, flags 0x%x\n", ++ DLNPair(dentry), dentry->d_inode->i_ino, bdst, bsrc, len, ++ flags); ++ sb = dentry->d_sb; ++ DEBUG_ON(bdst >= bsrc || test_ro(sb, bdst, NULL)); ++ // bsrc branch can be ro/rw. ++ ++ hidden_src = au_h_dptr_i(dentry, bsrc); ++ DEBUG_ON(!hidden_src); ++ hidden_inode = hidden_src->d_inode; ++ DEBUG_ON(!hidden_inode); ++ ++ /* stop refrencing while we are creating */ ++ //parent = dget_parent(dentry); ++ parent = dentry->d_parent; ++ dir = parent->d_inode; ++ hidden_dst = au_h_dptr_i(dentry, bdst); ++ DEBUG_ON(hidden_dst && hidden_dst->d_inode); ++ //hidden_parent = dget_parent(hidden_dst); ++ hidden_parent = hidden_dst->d_parent; ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ ++ if (do_dt) ++ dtime_store(&dt, parent, hidden_parent); ++ ++ isdir = 0; ++ mode = hidden_inode->i_mode; ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ /* stop updating while we are referencing */ ++ IMustLock(hidden_inode); ++ err = vfsub_create(hidden_dir, hidden_dst, mode | S_IWUSR, NULL, ++ dlgt); ++ //if (LktrCond) {vfs_unlink(hidden_dir, hidden_dst); err = -1;} ++ if (!err) { ++ loff_t l = i_size_read(hidden_inode); ++ if (len == -1 || l < len) ++ len = l; ++ if (len) { ++ err = cpup_regular(dentry, bdst, bsrc, len); ++ //if (LktrCond) err = -1; ++ } ++ if (unlikely(err)) { ++ int rerr; ++ rerr = vfsub_unlink(hidden_dir, hidden_dst, ++ dlgt); ++ if (rerr) { ++ IOErr("failed unlinking cpup-ed %.*s" ++ "(%d, %d)\n", ++ DLNPair(hidden_dst), err, rerr); ++ err = -EIO; ++ } ++ } ++ } ++ break; ++ case S_IFDIR: ++ isdir = 1; ++ err = vfsub_mkdir(hidden_dir, hidden_dst, mode, dlgt); ++ //if (LktrCond) {vfs_rmdir(hidden_dir, hidden_dst); err = -1;} ++ if (!err) { ++ /* setattr case: dir is not locked */ ++ if (0 && ibstart(dir) == bdst) ++ au_cpup_attr_nlink(dir); ++ au_cpup_attr_nlink(dentry->d_inode); ++ } ++ break; ++ case S_IFLNK: ++ err = -ENOMEM; ++ sym = __getname(); ++ //if (LktrCond) {__putname(sym); sym = NULL;} ++ if (unlikely(!sym)) ++ break; ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = symlen = hidden_inode->i_op->readlink ++ (hidden_src, (char __user*)sym, PATH_MAX); ++ //if (LktrCond) err = symlen = -1; ++ set_fs(old_fs); ++ if (symlen > 0) { ++ sym[symlen] = 0; ++ err = vfsub_symlink(hidden_dir, hidden_dst, sym, mode, ++ dlgt); ++ //if (LktrCond) ++ //{vfs_unlink(hidden_dir, hidden_dst); err = -1;} ++ } ++ __putname(sym); ++ break; ++ case S_IFCHR: ++ case S_IFBLK: ++ DEBUG_ON(!capable(CAP_MKNOD)); ++ /*FALLTHROUGH*/ ++ case S_IFIFO: ++ case S_IFSOCK: ++ err = vfsub_mknod(hidden_dir, hidden_dst, mode, ++ hidden_inode->i_rdev, dlgt); ++ //if (LktrCond) {vfs_unlink(hidden_dir, hidden_dst); err = -1;} ++ break; ++ default: ++ IOErr("Unknown inode type 0%o\n", mode); ++ err = -EIO; ++ } ++ ++ if (do_dt) ++ dtime_revert(&dt, flags & CPUP_LOCKED_GHDIR); ++ //dput(parent); ++ //dput(hidden_parent); ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * copyup the @dentry from @bsrc to @bdst. ++ * the caller must set the both of hidden dentries. ++ * @len is for trucating when it is -1 copyup the entire file. ++ */ ++int cpup_single(struct dentry *dentry, aufs_bindex_t bdst, aufs_bindex_t bsrc, ++ loff_t len, unsigned int flags) ++{ ++ int err, rerr, isdir, dlgt; ++ struct dentry *hidden_src, *hidden_dst, *parent;//, *h_parent; ++ struct inode *dst_inode, *hidden_dir, *inode, *src_inode; ++ struct super_block *sb; ++ aufs_bindex_t old_ibstart; ++ struct dtime dt; ++ ++ LKTRTrace("%.*s, i%lu, bdst %d, bsrc %d, len %Ld, flags 0x%x\n", ++ DLNPair(dentry), dentry->d_inode->i_ino, bdst, bsrc, len, ++ flags); ++ sb = dentry->d_sb; ++ DEBUG_ON(bsrc <= bdst); ++ hidden_dst = au_h_dptr_i(dentry, bdst); ++ DEBUG_ON(!hidden_dst || hidden_dst->d_inode); ++ //h_parent = dget_parent(hidden_dst); ++ //hidden_dir = h_parent->d_inode; ++ hidden_dir = hidden_dst->d_parent->d_inode; ++ IMustLock(hidden_dir); ++ hidden_src = au_h_dptr_i(dentry, bsrc); ++ DEBUG_ON(!hidden_src || !hidden_src->d_inode); ++ inode = dentry->d_inode; ++ IiMustWriteLock(inode); ++ ++ dlgt = need_dlgt(sb); ++ dst_inode = au_h_iptr_i(inode, bdst); ++ if (unlikely(dst_inode)) { ++ if (unlikely(!au_flag_test(sb, AuFlag_PLINK))) { ++ err = -EIO; ++ IOErr("i%lu exists on a upper branch " ++ "but plink is disabled\n", inode->i_ino); ++ goto out; ++ } ++ ++ if (dst_inode->i_nlink) { ++ hidden_src = lkup_plink(sb, bdst, inode); ++ err = PTR_ERR(hidden_src); ++ if (IS_ERR(hidden_src)) ++ goto out; ++ DEBUG_ON(!hidden_src->d_inode); ++ // vfs_link() does lock the inode ++ err = vfsub_link(hidden_src, hidden_dir, hidden_dst, dlgt); ++ dput(hidden_src); ++ goto out; ++ } else ++ /* udba work */ ++ au_update_brange(inode, 1); ++ } ++ ++ old_ibstart = ibstart(inode); ++ err = cpup_entry(dentry, bdst, bsrc, len, flags, dlgt); ++ if (unlikely(err)) ++ goto out; ++ dst_inode = hidden_dst->d_inode; ++ hi_lock_child2(dst_inode); ++ ++ //todo: test dlgt ++ err = cpup_iattr(hidden_dst, hidden_src, dlgt); ++ //if (LktrCond) err = -1; ++#if 0 // xattr ++ if (0 && !err) ++ err = cpup_xattrs(hidden_src, hidden_dst); ++#endif ++ isdir = S_ISDIR(dst_inode->i_mode); ++ if (!err) { ++ if (bdst < old_ibstart) ++ set_ibstart(inode, bdst); ++ set_h_iptr(inode, bdst, igrab(dst_inode), ++ au_hi_flags(inode, isdir)); ++ i_unlock(dst_inode); ++ src_inode = hidden_src->d_inode; ++ if (!isdir) { ++ if (src_inode->i_nlink > 1 ++ && au_flag_test(sb, AuFlag_PLINK)) ++ append_plink(sb, inode, hidden_dst, bdst); ++ else { ++ /* braces are added to stop a warning */ ++ ;//xino_write0(sb, bsrc, src_inode->i_ino); ++ /* ignore this error */ ++ } ++ } ++ //goto out; /* success */ ++ return 0; /* success */ ++ } ++ ++ /* revert */ ++ i_unlock(dst_inode); ++ parent = dget_parent(dentry); ++ //dtime_store(&dt, parent, h_parent); ++ dtime_store(&dt, parent, hidden_dst->d_parent); ++ dput(parent); ++ if (!isdir) ++ rerr = vfsub_unlink(hidden_dir, hidden_dst, dlgt); ++ else ++ rerr = vfsub_rmdir(hidden_dir, hidden_dst, dlgt); ++ //rerr = -1; ++ dtime_revert(&dt, flags & CPUP_LOCKED_GHDIR); ++ if (rerr) { ++ IOErr("failed removing broken entry(%d, %d)\n", err, rerr); ++ err = -EIO; ++ } ++ ++ out: ++ //dput(h_parent); ++ TraceErr(err); ++ return err; ++} ++ ++struct cpup_single_args { ++ int *errp; ++ struct dentry *dentry; ++ aufs_bindex_t bdst, bsrc; ++ loff_t len; ++ unsigned int flags; ++}; ++ ++static void call_cpup_single(void *args) ++{ ++ struct cpup_single_args *a = args; ++ *a->errp = cpup_single(a->dentry, a->bdst, a->bsrc, a->len, a->flags); ++} ++ ++int sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst, ++ aufs_bindex_t bsrc, loff_t len, unsigned int flags) ++{ ++ int err; ++ struct dentry *hidden_dentry; ++ umode_t mode; ++ ++ LKTRTrace("%.*s, i%lu, bdst %d, bsrc %d, len %Ld, flags 0x%x\n", ++ DLNPair(dentry), dentry->d_inode->i_ino, bdst, bsrc, len, ++ flags); ++ ++ hidden_dentry = au_h_dptr_i(dentry, bsrc); ++ mode = hidden_dentry->d_inode->i_mode & S_IFMT; ++ if ((mode != S_IFCHR && mode != S_IFBLK) ++ || capable(CAP_MKNOD)) ++ err = cpup_single(dentry, bdst, bsrc, len, flags); ++ else { ++ struct cpup_single_args args = { ++ .errp = &err, ++ .dentry = dentry, ++ .bdst = bdst, ++ .bsrc = bsrc, ++ .len = len, ++ .flags = flags ++ }; ++ au_wkq_wait(call_cpup_single, &args, /*dlgt*/0); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * copyup the @dentry from the first active hidden branch to @bdst, ++ * using cpup_single(). ++ */ ++int cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ unsigned int flags) ++{ ++ int err; ++ struct inode *inode; ++ aufs_bindex_t bsrc, bend; ++ ++ LKTRTrace("%.*s, bdst %d, len %Ld, flags 0x%x\n", ++ DLNPair(dentry), bdst, len, flags); ++ inode = dentry->d_inode; ++ DEBUG_ON(!S_ISDIR(inode->i_mode) && dbstart(dentry) < bdst); ++ ++ bend = dbend(dentry); ++ for (bsrc = bdst + 1; bsrc <= bend; bsrc++) ++ if (au_h_dptr_i(dentry, bsrc)) ++ break; ++ DEBUG_ON(!au_h_dptr_i(dentry, bsrc)); ++ ++ err = lkup_neg(dentry, bdst); ++ //err = -1; ++ if (!err) { ++ err = cpup_single(dentry, bdst, bsrc, len, flags); ++ if (!err) ++ return 0; /* success */ ++ ++ /* revert */ ++ set_h_dptr(dentry, bdst, NULL); ++ set_dbstart(dentry, bsrc); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++struct cpup_simple_args { ++ int *errp; ++ struct dentry *dentry; ++ aufs_bindex_t bdst; ++ loff_t len; ++ unsigned int flags; ++}; ++ ++static void call_cpup_simple(void *args) ++{ ++ struct cpup_simple_args *a = args; ++ *a->errp = cpup_simple(a->dentry, a->bdst, a->len, a->flags); ++} ++ ++int sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ unsigned int flags) ++{ ++ int err, do_sio, dlgt; ++ //struct dentry *parent; ++ struct inode *hidden_dir, *dir; ++ ++ LKTRTrace("%.*s, b%d, len %Ld, flags 0x%x\n", ++ DLNPair(dentry), bdst, len, flags); ++ ++ //parent = dget_parent(dentry); ++ //dir = parent->d_inode; ++ dir = dentry->d_parent->d_inode; ++ hidden_dir = au_h_iptr_i(dir, bdst); ++ dlgt = need_dlgt(dir->i_sb); ++ do_sio = au_test_perm(hidden_dir, MAY_EXEC | MAY_WRITE, dlgt); ++ if (!do_sio) { ++ umode_t mode = dentry->d_inode->i_mode & S_IFMT; ++ do_sio = ((mode == S_IFCHR || mode == S_IFBLK) ++ && !capable(CAP_MKNOD)); ++ } ++ if (!do_sio) ++ err = cpup_simple(dentry, bdst, len, flags); ++ else { ++ struct cpup_simple_args args = { ++ .errp = &err, ++ .dentry = dentry, ++ .bdst = bdst, ++ .len = len, ++ .flags = flags ++ }; ++ au_wkq_wait(call_cpup_simple, &args, /*dlgt*/0); ++ } ++ ++ //dput(parent); ++ TraceErr(err); ++ return err; ++} ++ ++//todo: dcsub ++/* cf. revalidate function in file.c */ ++int cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst, struct dentry *locked) ++{ ++ int err; ++ struct super_block *sb; ++ struct dentry *d, *parent, *hidden_parent; ++ unsigned int udba; ++ ++ LKTRTrace("%.*s, b%d, parent i%lu, locked %p\n", ++ DLNPair(dentry), bdst, parent_ino(dentry), locked); ++ sb = dentry->d_sb; ++ DEBUG_ON(test_ro(sb, bdst, NULL)); ++ parent = dentry->d_parent; ++ IiMustWriteLock(parent->d_inode); ++ if (unlikely(IS_ROOT(parent))) ++ return 0; ++ if (locked) { ++ DiMustAnyLock(locked); ++ IiMustAnyLock(locked->d_inode); ++ } ++ ++ /* slow loop, keep it simple and stupid */ ++ err = 0; ++ udba = au_flag_test(sb, AuFlag_UDBA_INOTIFY); ++ while (1) { ++ parent = dentry->d_parent; // dget_parent() ++ hidden_parent = au_h_dptr_i(parent, bdst); ++ if (hidden_parent) ++ return 0; /* success */ ++ ++ /* find top dir which is needed to cpup */ ++ do { ++ d = parent; ++ parent = d->d_parent; // dget_parent() ++ if (parent != locked) ++ di_read_lock_parent3(parent, !AUFS_I_RLOCK); ++ hidden_parent = au_h_dptr_i(parent, bdst); ++ if (parent != locked) ++ di_read_unlock(parent, !AUFS_I_RLOCK); ++ } while (!hidden_parent); ++ ++ if (d != dentry->d_parent) ++ di_write_lock_child3(d); ++ ++ /* somebody else might create while we were sleeping */ ++ if (!au_h_dptr_i(d, bdst) || !au_h_dptr_i(d, bdst)->d_inode) { ++ struct inode *h_dir = hidden_parent->d_inode, ++ *dir = parent->d_inode, ++ *h_gdir, *gdir; ++ ++ if (au_h_dptr_i(d, bdst)) ++ au_update_dbstart(d); ++ //DEBUG_ON(dbstart(d) <= bdst); ++ if (parent != locked) ++ di_read_lock_parent3(parent, AUFS_I_RLOCK); ++ h_gdir = gdir = NULL; ++ if (unlikely(udba && !IS_ROOT(parent))) { ++ gdir = parent->d_parent->d_inode; ++ h_gdir = hidden_parent->d_parent->d_inode; ++ hgdir_lock(h_gdir, gdir, bdst); ++ } ++ hdir_lock(h_dir, dir, bdst); ++ err = sio_cpup_simple(d, bdst, -1, ++ au_flags_cpup(CPUP_DTIME, ++ parent)); ++ //if (LktrCond) err = -1; ++ hdir_unlock(h_dir, dir, bdst); ++ if (unlikely(gdir)) ++ hdir_unlock(h_gdir, gdir, bdst); ++ if (parent != locked) ++ di_read_unlock(parent, AUFS_I_RLOCK); ++ } ++ ++ if (d != dentry->d_parent) ++ di_write_unlock(d); ++ if (unlikely(err)) ++ break; ++ } ++ ++// out: ++ TraceErr(err); ++ return err; ++} ++ ++int test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst, ++ struct dentry *locked) ++{ ++ int err; ++ struct dentry *parent; ++ struct inode *dir; ++ ++ parent = dentry->d_parent; ++ dir = parent->d_inode; ++ LKTRTrace("%.*s, b%d, parent i%lu, locked %p\n", ++ DLNPair(dentry), bdst, dir->i_ino, locked); ++ DiMustReadLock(parent); ++ IiMustReadLock(dir); ++ ++ if (au_h_iptr_i(dir, bdst)) ++ return 0; ++ ++ err = 0; ++ di_read_unlock(parent, AUFS_I_RLOCK); ++ di_write_lock_parent(parent); ++ if (au_h_iptr_i(dir, bdst)) ++ goto out; ++ ++ err = cpup_dirs(dentry, bdst, locked); ++ ++ out: ++ di_downgrade_lock(parent, AUFS_I_RLOCK); ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/cpup.h b/fs/aufs/cpup.h +new file mode 100755 +index 0000000..86557aa +--- /dev/null ++++ b/fs/aufs/cpup.h +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: cpup.h,v 1.15 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_CPUP_H__ ++#define __AUFS_CPUP_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++ ++static inline ++void au_cpup_attr_blksize(struct inode *inode, struct inode *h_inode) ++{ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ++ inode->i_blksize = h_inode->i_blksize; ++#endif ++} ++ ++void au_cpup_attr_timesizes(struct inode *inode); ++void au_cpup_attr_nlink(struct inode *inode); ++void au_cpup_attr_changable(struct inode *inode); ++void au_cpup_igen(struct inode *inode, struct inode *h_inode); ++void au_cpup_attr_all(struct inode *inode); ++ ++#define CPUP_DTIME 1 // do dtime_store/revert ++// todo: remove this ++#define CPUP_LOCKED_GHDIR 2 // grand parent hidden dir is locked ++unsigned int au_flags_cpup(unsigned int init, struct dentry *parent); ++ ++int cpup_single(struct dentry *dentry, aufs_bindex_t bdst, aufs_bindex_t bsrc, ++ loff_t len, unsigned int flags); ++int sio_cpup_single(struct dentry *dentry, aufs_bindex_t bdst, ++ aufs_bindex_t bsrc, loff_t len, unsigned int flags); ++int cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ unsigned int flags); ++int sio_cpup_simple(struct dentry *dentry, aufs_bindex_t bdst, loff_t len, ++ unsigned int flags); ++ ++int cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst, struct dentry *locked); ++int test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst, ++ struct dentry *locked); ++ ++/* keep timestamps when copyup */ ++struct dtime { ++ struct dentry *dt_dentry, *dt_h_dentry; ++ struct timespec dt_atime, dt_mtime; ++}; ++void dtime_store(struct dtime *dt, struct dentry *dentry, ++ struct dentry *h_dentry); ++void dtime_revert(struct dtime *dt, int h_parent_is_locked); ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_CPUP_H__ */ +diff --git a/fs/aufs/dcsub.c b/fs/aufs/dcsub.c +new file mode 100755 +index 0000000..6ec29d3 +--- /dev/null ++++ b/fs/aufs/dcsub.c +@@ -0,0 +1,175 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: dcsub.c,v 1.3 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++static void au_dpage_free(struct au_dpage *dpage) ++{ ++ int i; ++ ++ TraceEnter(); ++ DEBUG_ON(!dpage); ++ ++ for (i = 0; i < dpage->ndentry; i++) ++ dput(dpage->dentries[i]); ++ free_page((unsigned long)dpage->dentries); ++} ++ ++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp) ++{ ++ int err; ++ void *p; ++ ++ TraceEnter(); ++ ++ err = -ENOMEM; ++ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp); ++ if (unlikely(!dpages->dpages)) ++ goto out; ++ p = (void*)__get_free_page(gfp); ++ if (unlikely(!p)) ++ goto out_dpages; ++ dpages->dpages[0].ndentry = 0; ++ dpages->dpages[0].dentries = p; ++ dpages->ndpage = 1; ++ return 0; /* success */ ++ ++ out_dpages: ++ kfree(dpages->dpages); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++void au_dpages_free(struct au_dcsub_pages *dpages) ++{ ++ int i; ++ ++ TraceEnter(); ++ ++ for (i = 0; i < dpages->ndpage; i++) ++ au_dpage_free(dpages->dpages + i); ++ kfree(dpages->dpages); ++} ++ ++static int au_dpages_append(struct au_dcsub_pages *dpages, ++ struct dentry *dentry, gfp_t gfp) ++{ ++ int err, sz; ++ struct au_dpage *dpage; ++ void *p; ++ ++ //TraceEnter(); ++ ++ dpage = dpages->dpages + dpages->ndpage - 1; ++ DEBUG_ON(!dpage); ++ sz = PAGE_SIZE/sizeof(dentry); ++ if (unlikely(dpage->ndentry >= sz)) { ++ LKTRLabel(new dpage); ++ err = -ENOMEM; ++ sz = dpages->ndpage * sizeof(*dpages->dpages); ++ p = au_kzrealloc(dpages->dpages, sz, ++ sz + sizeof(*dpages->dpages), gfp); ++ if (unlikely(!p)) ++ goto out; ++ dpage = dpages->dpages + dpages->ndpage; ++ p = (void*)__get_free_page(gfp); ++ if (unlikely(!p)) ++ goto out; ++ dpage->ndentry = 0; ++ dpage->dentries = p; ++ dpages->ndpage++; ++ } ++ ++ dpage->dentries[dpage->ndentry++] = dget(dentry); ++ return 0; /* success */ ++ ++ out: ++ //TraceErr(err); ++ return err; ++} ++ ++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, ++ au_dpages_test test, void *arg) ++{ ++ int err; ++ struct dentry *this_parent = root; ++ struct list_head *next; ++ struct super_block *sb = root->d_sb; ++ ++ TraceEnter(); ++ ++ err = 0; ++ spin_lock(&dcache_lock); ++ repeat: ++ next = this_parent->d_subdirs.next; ++ resume: ++ if (this_parent->d_sb == sb ++ && !IS_ROOT(this_parent) ++ && atomic_read(&this_parent->d_count) ++ && this_parent->d_inode ++ && (!test || test(this_parent, arg))) { ++ err = au_dpages_append(dpages, this_parent, GFP_ATOMIC); ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ while (next != &this_parent->d_subdirs) { ++ struct list_head *tmp = next; ++ struct dentry *dentry = list_entry(tmp, struct dentry, D_CHILD); ++ next = tmp->next; ++ if (unlikely(/*d_unhashed(dentry) || */!dentry->d_inode)) ++ continue; ++ if (!list_empty(&dentry->d_subdirs)) { ++ this_parent = dentry; ++ goto repeat; ++ } ++ if (dentry->d_sb == sb ++ && atomic_read(&dentry->d_count) ++ && (!test || test(dentry, arg))) { ++ err = au_dpages_append(dpages, dentry, GFP_ATOMIC); ++ if (unlikely(err)) ++ goto out; ++ } ++ } ++ ++ if (this_parent != root) { ++ next = this_parent->D_CHILD.next; ++ this_parent = this_parent->d_parent; ++ goto resume; ++ } ++ out: ++ spin_unlock(&dcache_lock); ++#if 0 ++ if (!err) { ++ int i, j; ++ j = 0; ++ for (i = 0; i < dpages->ndpage; i++) { ++ if ((dpages->dpages + i)->ndentry) ++ Dbg("%d: %d\n", i, (dpages->dpages + i)->ndentry); ++ j += (dpages->dpages + i)->ndentry; ++ } ++ if (j) ++ Dbg("ndpage %d, %d\n", dpages->ndpage, j); ++ } ++#endif ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/dcsub.h b/fs/aufs/dcsub.h +new file mode 100755 +index 0000000..0ba034b +--- /dev/null ++++ b/fs/aufs/dcsub.h +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: dcsub.h,v 1.2 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_DCSUB_H__ ++#define __AUFS_DCSUB_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/dcache.h> ++ ++struct au_dpage { ++ int ndentry; ++ struct dentry **dentries; ++}; ++ ++struct au_dcsub_pages { ++ int ndpage; ++ struct au_dpage *dpages; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp); ++void au_dpages_free(struct au_dcsub_pages *dpages); ++typedef int (*au_dpages_test)(struct dentry *dentry, void *arg); ++int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root, ++ au_dpages_test test, void *arg); ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_DCSUB_H__ */ +diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c +new file mode 100755 +index 0000000..99d158b +--- /dev/null ++++ b/fs/aufs/debug.c +@@ -0,0 +1,262 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: debug.c,v 1.27 2007/04/30 05:48:23 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++atomic_t aufs_cond = ATOMIC_INIT(0); ++ ++#if defined(CONFIG_LKTR) || defined(CONFIG_LKTR_MODULE) ++#define dpri(fmt, arg...) \ ++ do {if (LktrCond) printk(KERN_DEBUG fmt, ##arg);} while (0) ++#else ++#define dpri(fmt, arg...) printk(KERN_DEBUG fmt, ##arg) ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_dpri_whlist(struct aufs_nhash *whlist) ++{ ++ int i; ++ struct hlist_head *head; ++ struct aufs_wh *tpos; ++ struct hlist_node *pos; ++ ++ for (i = 0; i < AUFS_NHASH_SIZE; i++) { ++ head = whlist->heads + i; ++ hlist_for_each_entry(tpos, pos, head, wh_hash) ++ dpri("b%d, %.*s, %d\n", ++ tpos->wh_bindex, ++ tpos->wh_str.len, tpos->wh_str.name, ++ tpos->wh_str.len); ++ } ++} ++ ++void au_dpri_vdir(struct aufs_vdir *vdir) ++{ ++ int i; ++ union aufs_deblk_p p; ++ unsigned char *o; ++ ++ if (!vdir || IS_ERR(vdir)) { ++ dpri("err %ld\n", PTR_ERR(vdir)); ++ return; ++ } ++ ++ dpri("nblk %d, deblk %p %d, last{%d, %p}, ver %lu\n", ++ vdir->vd_nblk, vdir->vd_deblk, ksize(vdir->vd_deblk), ++ vdir->vd_last.i, vdir->vd_last.p.p, vdir->vd_version); ++ for (i = 0; i < vdir->vd_nblk; i++) { ++ p.deblk = vdir->vd_deblk[i]; ++ o = p.p; ++ dpri("[%d]: %p %d\n", i, o, ksize(o)); ++#if 0 // verbose ++ int j; ++ for (j = 0; j < 8; j++) { ++ dpri("%p(+%d) {%02x %02x %02x %02x %02x %02x %02x %02x " ++ "%02x %02x %02x %02x %02x %02x %02x %02x}\n", ++ p.p, p.p - o, ++ p.p[0], p.p[1], p.p[2], p.p[3], ++ p.p[4], p.p[5], p.p[6], p.p[7], ++ p.p[8], p.p[9], p.p[10], p.p[11], ++ p.p[12], p.p[13], p.p[14], p.p[15]); ++ p.p += 16; ++ } ++#endif ++ } ++} ++ ++static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode) ++{ ++ if (!inode || IS_ERR(inode)) { ++ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode)); ++ return -1; ++ } ++ ++ /* the type of i_blocks depends upon CONFIG_LSF */ ++ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long) ++ && sizeof(inode->i_blocks) != sizeof(u64)); ++ dpri("i%d: i%lu, %s, cnt %d, nl %u, 0%o, sz %Lu, blk %Lu," ++ " ct %Ld, np %lu, st 0x%lx, g %x\n", ++ bindex, ++ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??", ++ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode, ++ i_size_read(inode), (u64)inode->i_blocks, ++ timespec_to_ns(&inode->i_ctime) & 0x0ffff, ++ inode->i_mapping ? inode->i_mapping->nrpages : 0, ++ inode->i_state, inode->i_generation); ++ return 0; ++} ++ ++void au_dpri_inode(struct inode *inode) ++{ ++ struct aufs_iinfo *iinfo; ++ aufs_bindex_t bindex; ++ int err; ++ ++ err = do_pri_inode(-1, inode); ++ if (err || !au_is_aufs(inode->i_sb)) ++ return; ++ ++ iinfo = itoii(inode); ++ if (!iinfo) ++ return; ++ dpri("i-1: bstart %d, bend %d, gen %d\n", ++ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode)); ++ if (iinfo->ii_bstart < 0) ++ return; ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) ++ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode); ++} ++ ++static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry) ++{ ++ if (!dentry || IS_ERR(dentry)) { ++ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry)); ++ return -1; ++ } ++ dpri("d%d: %.*s/%.*s, %s, cnt %d, flags 0x%x\n", ++ bindex, ++ DLNPair(dentry->d_parent), DLNPair(dentry), ++ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??", ++ atomic_read(&dentry->d_count), dentry->d_flags); ++ do_pri_inode(bindex, dentry->d_inode); ++ return 0; ++} ++ ++void au_dpri_dentry(struct dentry *dentry) ++{ ++ struct aufs_dinfo *dinfo; ++ aufs_bindex_t bindex; ++ int err; ++ ++ err = do_pri_dentry(-1, dentry); ++ if (err || !au_is_aufs(dentry->d_sb)) ++ return; ++ ++ dinfo = dtodi(dentry); ++ if (!dinfo) ++ return; ++ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d\n", ++ dinfo->di_bstart, dinfo->di_bend, ++ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry)); ++ if (dinfo->di_bstart < 0) ++ return; ++ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++) ++ do_pri_dentry(bindex, dinfo->di_hdentry[0 + bindex].hd_dentry); ++} ++ ++static int do_pri_file(aufs_bindex_t bindex, struct file *file) ++{ ++ char a[32]; ++ ++ if (!file || IS_ERR(file)) { ++ dpri("f%d: err %ld\n", bindex, PTR_ERR(file)); ++ return -1; ++ } ++ a[0] = 0; ++ if (bindex == -1 && ftofi(file)) ++ snprintf(a, sizeof(a), ", mmapped %d", au_is_mmapped(file)); ++ dpri("f%d: mode 0x%x, flags 0%o, cnt %d, pos %Lu%s\n", ++ bindex, file->f_mode, file->f_flags, file_count(file), ++ file->f_pos, a); ++ do_pri_dentry(bindex, file->f_dentry); ++ return 0; ++} ++ ++void au_dpri_file(struct file *file) ++{ ++ struct aufs_finfo *finfo; ++ aufs_bindex_t bindex; ++ int err; ++ ++ err = do_pri_file(-1, file); ++ if (err || !file->f_dentry || !au_is_aufs(file->f_dentry->d_sb)) ++ return; ++ ++ finfo = ftofi(file); ++ if (!finfo) ++ return; ++ if (finfo->fi_bstart < 0) ++ return; ++ for (bindex = finfo->fi_bstart; bindex <= finfo->fi_bend; bindex++) { ++ struct aufs_hfile *hf; ++ //dpri("bindex %d\n", bindex); ++ hf = finfo->fi_hfile + bindex; ++ do_pri_file(bindex, hf ? hf->hf_file : NULL); ++ } ++} ++ ++static int do_pri_br(aufs_bindex_t bindex, struct aufs_branch *br) ++{ ++ struct vfsmount *mnt; ++ struct super_block *sb; ++ ++ if (!br || IS_ERR(br) ++ || !(mnt = br->br_mnt) || IS_ERR(mnt) ++ || !(sb = mnt->mnt_sb) || IS_ERR(sb)) { ++ dpri("s%d: err %ld\n", bindex, PTR_ERR(br)); ++ return -1; ++ } ++ ++ dpri("s%d: {perm 0x%x, cnt %d}, " ++ "%s, flags 0x%lx, cnt(BIAS) %d, active %d, xino %p %p\n", ++ bindex, br->br_perm, br_count(br), ++ au_sbtype(sb), sb->s_flags, sb->s_count - S_BIAS, ++ atomic_read(&sb->s_active), br->br_xino, ++ br->br_xino ? br->br_xino->f_dentry : NULL); ++ return 0; ++} ++ ++void au_dpri_sb(struct super_block *sb) ++{ ++ struct aufs_sbinfo *sbinfo; ++ aufs_bindex_t bindex; ++ int err; ++ struct vfsmount mnt = {.mnt_sb = sb}; ++ struct aufs_branch fake = { ++ .br_perm = 0, ++ .br_mnt = &mnt, ++ .br_count = ATOMIC_INIT(0), ++ .br_xino = NULL ++ }; ++ ++ atomic_set(&fake.br_count, 0); ++ err = do_pri_br(-1, &fake); ++ dpri("dev 0x%x\n", sb->s_dev); ++ if (err || !au_is_aufs(sb)) ++ return; ++ ++ sbinfo = stosi(sb); ++ if (!sbinfo) ++ return; ++ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++) { ++ //dpri("bindex %d\n", bindex); ++ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]); ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void DbgSleep(int sec) ++{ ++ static DECLARE_WAIT_QUEUE_HEAD(wq); ++ Dbg("sleep %d sec\n", sec); ++ wait_event_timeout(wq, 0, sec * HZ); ++} +diff --git a/fs/aufs/debug.h b/fs/aufs/debug.h +new file mode 100755 +index 0000000..53f5f6a +--- /dev/null ++++ b/fs/aufs/debug.h +@@ -0,0 +1,129 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: debug.h,v 1.31 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_DEBUG_H__ ++#define __AUFS_DEBUG_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++ ++#ifdef CONFIG_AUFS_DEBUG ++#define DEBUG_ON(a) BUG_ON(a) ++extern atomic_t aufs_cond; ++#define au_debug_on() atomic_inc(&aufs_cond) ++#define au_debug_off() atomic_dec(&aufs_cond) ++#define au_is_debug() atomic_read(&aufs_cond) ++#else ++#define DEBUG_ON(a) /* */ ++#define au_debug_on() /* */ ++#define au_debug_off() /* */ ++#define au_is_debug() 0 ++#endif ++ ++#define MtxMustLock(mtx) DEBUG_ON(!mutex_is_locked(mtx)) ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* debug print */ ++#if defined(CONFIG_LKTR) || defined(CONFIG_LKTR_MODULE) ++#include <linux/lktr.h> ++#ifdef CONFIG_AUFS_DEBUG ++#undef LktrCond ++#define LktrCond unlikely((lktr_cond && lktr_cond()) || au_is_debug()) ++#endif ++#else ++#define LktrCond au_is_debug() ++#define LKTRDumpVma(pre, vma, suf) /* */ ++#define LKTRDumpStack() /* */ ++#define LKTRTrace(fmt, args...) do { \ ++ if (LktrCond) \ ++ Dbg(fmt, ##args); \ ++} while (0) ++#define LKTRLabel(label) LKTRTrace("%s\n", #label) ++#endif /* CONFIG_LKTR */ ++ ++#define TraceErr(e) do { \ ++ if (unlikely((e) < 0)) \ ++ LKTRTrace("err %d\n", (int)(e)); \ ++} while (0) ++#define TraceErrPtr(p) do { \ ++ if (IS_ERR(p)) \ ++ LKTRTrace("err %ld\n", PTR_ERR(p)); \ ++} while (0) ++#define TraceEnter() LKTRLabel(enter) ++ ++/* dirty macros for debug print, use with "%.*s" and caution */ ++#define LNPair(qstr) (qstr)->len,(qstr)->name ++#define DLNPair(d) LNPair(&(d)->d_name) ++ ++/* ---------------------------------------------------------------------- */ ++ ++#define Dpri(lvl, fmt, arg...) \ ++ printk(lvl AUFS_NAME " %s:%d:%s[%d]: " fmt, \ ++ __func__, __LINE__, current->comm, current->pid, ##arg) ++#define Dbg(fmt, arg...) Dpri(KERN_DEBUG, fmt, ##arg) ++#define Warn(fmt, arg...) Dpri(KERN_WARNING, fmt, ##arg) ++#define Warn1(fmt, arg...) do { \ ++ static unsigned char c; \ ++ if (!c++) Warn(fmt, ##arg); \ ++ } while (0) ++#define Err(fmt, arg...) Dpri(KERN_ERR, fmt, ##arg) ++#define Err1(fmt, arg...) do { \ ++ static unsigned char c; \ ++ if (!c++) Err(fmt, ##arg); \ ++ } while (0) ++#define IOErr(fmt, arg...) Err("I/O Error, " fmt, ##arg) ++#define IOErr1(fmt, arg...) do { \ ++ static unsigned char c; \ ++ if (!c++) IOErr(fmt, ##arg); \ ++ } while (0) ++#define IOErrWhck(fmt, arg...) Err("I/O Error, try whck. " fmt, ##arg) ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_DEBUG ++struct aufs_nhash; ++void au_dpri_whlist(struct aufs_nhash *whlist); ++struct aufs_vdir; ++void au_dpri_vdir(struct aufs_vdir *vdir); ++void au_dpri_inode(struct inode *inode); ++void au_dpri_dentry(struct dentry *dentry); ++void au_dpri_file(struct file *filp); ++void au_dpri_sb(struct super_block *sb); ++#define DbgWhlist(w) do{LKTRTrace(#w "\n"); au_dpri_whlist(w);}while(0) ++#define DbgVdir(v) do{LKTRTrace(#v "\n"); au_dpri_vdir(v);}while(0) ++#define DbgInode(i) do{LKTRTrace(#i "\n"); au_dpri_inode(i);}while(0) ++#define DbgDentry(d) do{LKTRTrace(#d "\n"); au_dpri_dentry(d);}while(0) ++#define DbgFile(f) do{LKTRTrace(#f "\n"); au_dpri_file(f);}while(0) ++#define DbgSb(sb) do{LKTRTrace(#sb "\n"); au_dpri_sb(sb);}while(0) ++void DbgSleep(int sec); ++#else ++#define DbgWhlist(w) /* */ ++#define DbgVdir(v) /* */ ++#define DbgInode(i) /* */ ++#define DbgDentry(d) /* */ ++#define DbgFile(f) /* */ ++#define DbgSb(sb) /* */ ++#define DbgSleep(sec) /* */ ++#endif ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_DEBUG_H__ */ +diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c +new file mode 100755 +index 0000000..2acb89b +--- /dev/null ++++ b/fs/aufs/dentry.c +@@ -0,0 +1,946 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: dentry.c,v 1.41 2007/05/14 03:38:38 sfjro Exp $ */ ++ ++//#include <linux/fs.h> ++//#include <linux/namei.h> ++#include "aufs.h" ++ ++#ifdef CONFIG_AUFS_LHASH_PATCH ++ ++#ifdef CONFIG_AUFS_DLGT ++struct lookup_hash_args { ++ struct dentry **errp; ++ struct qstr *name; ++ struct dentry *base; ++ struct nameidata *nd; ++}; ++ ++static void call_lookup_hash(void *args) ++{ ++ struct lookup_hash_args *a = args; ++ *a->errp = __lookup_hash(a->name, a->base, a->nd); ++} ++#endif /* CONFIG_AUFS_DLGT */ ++ ++static struct dentry *lkup_hash(const char *name, struct dentry *parent, ++ int len, struct lkup_args *lkup) ++{ ++ struct dentry *dentry; ++ char *p; ++ unsigned long hash; ++ struct qstr this; ++ unsigned int c; ++ struct nameidata tmp_nd; ++ ++ dentry = ERR_PTR(-EACCES); ++ this.name = name; ++ this.len = len; ++ if (unlikely(!len)) ++ goto out; ++ ++ p = (void*)name; ++ hash = init_name_hash(); ++ while (len--) { ++ c = *p++; ++ if (unlikely(c == '/' || c == '\0')) ++ goto out; ++ hash = partial_name_hash(c, hash); ++ } ++ this.hash = end_name_hash(hash); ++ ++ memset(&tmp_nd, 0, sizeof(tmp_nd)); ++ tmp_nd.dentry = dget(parent); ++ tmp_nd.mnt = mntget(lkup->nfsmnt); ++#ifndef CONFIG_AUFS_DLGT ++ dentry = __lookup_hash(&this, parent, &tmp_nd); ++#else ++ if (!lkup->dlgt) ++ dentry = __lookup_hash(&this, parent, &tmp_nd); ++ else { ++ struct lookup_hash_args args = { ++ .errp = &dentry, ++ .name = &this, ++ .base = parent, ++ .nd = &tmp_nd ++ }; ++ au_wkq_wait(call_lookup_hash, &args, /*dlgt*/1); ++ } ++#endif ++ path_release(&tmp_nd); ++ ++ out: ++ TraceErrPtr(dentry); ++ return dentry; ++} ++#elif defined(CONFIG_AUFS_DLGT) ++static struct dentry *lkup_hash(const char *name, struct dentry *parent, ++ int len, struct lkup_args *lkup) ++{ ++ return ERR_PTR(-ENOSYS); ++} ++#endif ++ ++#ifdef CONFIG_AUFS_DLGT ++struct lookup_one_len_args { ++ struct dentry **errp; ++ const char *name; ++ struct dentry *parent; ++ int len; ++}; ++ ++static void call_lookup_one_len(void *args) ++{ ++ struct lookup_one_len_args *a = args; ++ *a->errp = lookup_one_len(a->name, a->parent, a->len); ++} ++#endif /* CONFIG_AUFS_DLGT */ ++ ++#if defined(CONFIG_AUFS_LHASH_PATCH) || defined(CONFIG_AUFS_DLGT) ++/* cf. lookup_one_len() in linux/fs/namei.c */ ++struct dentry *lkup_one(const char *name, struct dentry *parent, int len, ++ struct lkup_args *lkup) ++{ ++ struct dentry *dentry; ++ ++ LKTRTrace("%.*s/%.*s, lkup{%p, %d}\n", ++ DLNPair(parent), len, name, lkup->nfsmnt, lkup->dlgt); ++ ++ if (!lkup->nfsmnt) { ++#ifndef CONFIG_AUFS_DLGT ++ dentry = lookup_one_len(name, parent, len); ++#else ++ if (!lkup->dlgt) ++ dentry = lookup_one_len(name, parent, len); ++ else { ++ struct lookup_one_len_args args = { ++ .errp = &dentry, ++ .name = name, ++ .parent = parent, ++ .len = len ++ }; ++ au_wkq_wait(call_lookup_one_len, &args, /*dlgt*/1); ++ } ++#endif ++ } else ++ dentry = lkup_hash(name, parent, len, lkup); ++ ++ TraceErrPtr(dentry); ++ return dentry; ++} ++#endif ++ ++struct lkup_one_args { ++ struct dentry **errp; ++ const char *name; ++ struct dentry *parent; ++ int len; ++ struct lkup_args *lkup; ++}; ++ ++static void call_lkup_one(void *args) ++{ ++ struct lkup_one_args *a = args; ++ *a->errp = lkup_one(a->name, a->parent, a->len, a->lkup); ++} ++ ++/* ++ * returns positive/negative dentry, NULL or an error. ++ * NULL means whiteout-ed or not-found. ++ */ ++static struct dentry *do_lookup(struct dentry *hidden_parent, ++ struct dentry *dentry, aufs_bindex_t bindex, ++ struct qstr *wh_name, int allow_neg, ++ mode_t type, int dlgt) ++{ ++ struct dentry *hidden_dentry; ++ int wh_found, wh_able, opq; ++ struct inode *hidden_dir, *hidden_inode; ++ struct qstr *name; ++ struct super_block *sb; ++ struct lkup_args lkup = {.dlgt = dlgt}; ++ ++ LKTRTrace("%.*s/%.*s, b%d, allow_neg %d, type 0%o, dlgt %d\n", ++ DLNPair(hidden_parent), DLNPair(dentry), bindex, allow_neg, ++ type, dlgt); ++ DEBUG_ON(IS_ROOT(dentry)); ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ ++ wh_found = 0; ++ sb = dentry->d_sb; ++ wh_able = sbr_is_whable(sb, bindex); ++ lkup.nfsmnt = au_nfsmnt(sb, bindex); ++ name = &dentry->d_name; ++ if (unlikely(wh_able)) { ++#if 0 //def CONFIG_AUFS_ROBR ++ if (strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) ++ wh_found = is_wh(hidden_parent, wh_name, /*try_sio*/0, ++ &lkup); ++ else ++ wh_found = -EPERM; ++#else ++ wh_found = is_wh(hidden_parent, wh_name, /*try_sio*/0, &lkup); ++#endif ++ } ++ //if (LktrCond) wh_found = -1; ++ hidden_dentry = ERR_PTR(wh_found); ++ if (!wh_found) ++ goto real_lookup; ++ if (unlikely(wh_found < 0)) ++ goto out; ++ ++ /* We found a whiteout */ ++ //set_dbend(dentry, bindex); ++ set_dbwh(dentry, bindex); ++ if (!allow_neg) ++ return NULL; /* success */ ++ ++ real_lookup: ++ // do not superio. ++ hidden_dentry = lkup_one(name->name, hidden_parent, name->len, &lkup); ++ //if (LktrCond) {dput(hidden_dentry); hidden_dentry = ERR_PTR(-1);} ++ if (IS_ERR(hidden_dentry)) ++ goto out; ++ DEBUG_ON(d_unhashed(hidden_dentry)); ++ hidden_inode = hidden_dentry->d_inode; ++ if (!hidden_inode) { ++ if (!allow_neg) ++ goto out_neg; ++ } else if (wh_found ++ || (type && type != (hidden_inode->i_mode & S_IFMT))) ++ goto out_neg; ++ ++ if (dbend(dentry) <= bindex) ++ set_dbend(dentry, bindex); ++ if (dbstart(dentry) == -1 || bindex < dbstart(dentry)) ++ set_dbstart(dentry, bindex); ++ set_h_dptr(dentry, bindex, hidden_dentry); ++ ++ if (!hidden_inode || !S_ISDIR(hidden_inode->i_mode) || !wh_able) ++ return hidden_dentry; /* success */ ++ ++ hi_lock_child(hidden_inode); ++ opq = is_diropq(hidden_dentry, &lkup); ++ //if (LktrCond) opq = -1; ++ i_unlock(hidden_inode); ++ if (opq > 0) ++ set_dbdiropq(dentry, bindex); ++ else if (unlikely(opq < 0)) { ++ set_h_dptr(dentry, bindex, NULL); ++ hidden_dentry = ERR_PTR(opq); ++ } ++ goto out; ++ ++ out_neg: ++ dput(hidden_dentry); ++ hidden_dentry = NULL; ++ out: ++ TraceErrPtr(hidden_dentry); ++ return hidden_dentry; ++} ++ ++/* ++ * returns the number of hidden positive dentries, ++ * otherwise an error. ++ */ ++int lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type) ++{ ++ int npositive, err, allow_neg, dlgt; ++ struct dentry *parent; ++ aufs_bindex_t bindex, btail; ++ const struct qstr *name = &dentry->d_name; ++ struct qstr whname; ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s, b%d, type 0%o\n", LNPair(name), bstart, type); ++ DEBUG_ON(bstart < 0 || IS_ROOT(dentry)); ++ parent = dget_parent(dentry); ++ ++#if 1 //ndef CONFIG_AUFS_ROBR ++ err = -EPERM; ++ if (unlikely(!strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))) ++ goto out; ++#endif ++ ++ err = au_alloc_whname(name->name, name->len, &whname); ++ //if (LktrCond) {au_free_whname(&whname); err = -1;} ++ if (unlikely(err)) ++ goto out; ++ ++ sb = dentry->d_sb; ++ dlgt = need_dlgt(sb); ++ allow_neg = !type; ++ npositive = 0; ++ btail = dbtaildir(parent); ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ struct dentry *hidden_parent, *hidden_dentry; ++ struct inode *hidden_inode; ++ struct inode *hidden_dir; ++ ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (hidden_dentry) { ++ if (hidden_dentry->d_inode) ++ npositive++; ++ if (type != S_IFDIR) ++ break; ++ continue; ++ } ++ hidden_parent = au_h_dptr_i(parent, bindex); ++ if (!hidden_parent) ++ continue; ++ hidden_dir = hidden_parent->d_inode; ++ if (!hidden_dir || !S_ISDIR(hidden_dir->i_mode)) ++ continue; ++ ++ hi_lock_parent(hidden_dir); ++ hidden_dentry = do_lookup(hidden_parent, dentry, bindex, ++ &whname, allow_neg, type, dlgt); ++ // do not dput for testing ++ //if (LktrCond) {hidden_dentry = ERR_PTR(-1);} ++ i_unlock(hidden_dir); ++ err = PTR_ERR(hidden_dentry); ++ if (IS_ERR(hidden_dentry)) ++ goto out_wh; ++ allow_neg = 0; ++ ++ if (dbwh(dentry) != -1) ++ break; ++ if (!hidden_dentry) ++ continue; ++ hidden_inode = hidden_dentry->d_inode; ++ if (!hidden_inode) ++ continue; ++ npositive++; ++ if (!type) ++ type = hidden_inode->i_mode & S_IFMT; ++ if (type != S_IFDIR) ++ break; ++ else if (dbdiropq(dentry) != -1) ++ break; ++ } ++ ++ if (npositive) { ++ LKTRLabel(positive); ++ au_update_dbstart(dentry); ++ } ++ err = npositive; ++ ++ out_wh: ++ au_free_whname(&whname); ++ out: ++ dput(parent); ++ TraceErr(err); ++ return err; ++} ++ ++struct dentry *sio_lkup_one(const char *name, struct dentry *parent, int len, ++ struct lkup_args *lkup) ++{ ++ struct dentry *dentry; ++ ++ LKTRTrace("%.*s/%.*s\n", DLNPair(parent), len, name); ++ IMustLock(parent->d_inode); ++ ++ if (!au_test_perm(parent->d_inode, MAY_EXEC, lkup->dlgt)) ++ dentry = lkup_one(name, parent, len, lkup); ++ else { ++ // ugly ++ int dlgt = lkup->dlgt; ++ struct lkup_one_args args = { ++ .errp = &dentry, ++ .name = name, ++ .parent = parent, ++ .len = len, ++ .lkup = lkup ++ }; ++ ++ lkup->dlgt = 0; ++ au_wkq_wait(call_lkup_one, &args, /*dlgt*/0); ++ lkup->dlgt = dlgt; ++ } ++ ++ TraceErrPtr(dentry); ++ return dentry; ++} ++ ++/* ++ * lookup @dentry on @bindex which should be negative. ++ */ ++int lkup_neg(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ int err; ++ struct dentry *parent, *hidden_parent, *hidden_dentry; ++ struct inode *hidden_dir; ++ struct lkup_args lkup; ++ ++ LKTRTrace("%.*s, b%d\n", DLNPair(dentry), bindex); ++ parent = dget_parent(dentry); ++ DEBUG_ON(!parent || !parent->d_inode ++ || !S_ISDIR(parent->d_inode->i_mode)); ++ hidden_parent = au_h_dptr_i(parent, bindex); ++ DEBUG_ON(!hidden_parent); ++ hidden_dir = hidden_parent->d_inode; ++ DEBUG_ON(!hidden_dir || !S_ISDIR(hidden_dir->i_mode)); ++ IMustLock(hidden_dir); ++ ++ lkup.nfsmnt = au_nfsmnt(dentry->d_sb, bindex); ++ lkup.dlgt = need_dlgt(dentry->d_sb); ++ hidden_dentry = sio_lkup_one(dentry->d_name.name, hidden_parent, ++ dentry->d_name.len, &lkup); ++ //if (LktrCond) {dput(hidden_dentry); hidden_dentry = ERR_PTR(-1);} ++ err = PTR_ERR(hidden_dentry); ++ if (IS_ERR(hidden_dentry)) ++ goto out; ++ if (unlikely(hidden_dentry->d_inode)) { ++ err = -EIO; ++ IOErr("b%d %.*s should be negative.%s\n", ++ bindex, DLNPair(hidden_dentry), ++ au_flag_test(dentry->d_sb, AuFlag_UDBA_INOTIFY) ? "" : ++ " Try udba=inotify."); ++ dput(hidden_dentry); ++ goto out; ++ } ++ ++ if (bindex < dbstart(dentry)) ++ set_dbstart(dentry, bindex); ++ if (dbend(dentry) < bindex) ++ set_dbend(dentry, bindex); ++ set_h_dptr(dentry, bindex, hidden_dentry); ++ err = 0; ++ ++ out: ++ dput(parent); ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * returns the number of found hidden positive dentries, ++ * otherwise an error. ++ */ ++int au_refresh_hdentry(struct dentry *dentry, mode_t type) ++{ ++ int npositive, pgen, new_sz, sgen, dgen; ++ struct aufs_dinfo *dinfo; ++ struct super_block *sb; ++ struct dentry *parent; ++ aufs_bindex_t bindex, parent_bend, parent_bstart, bwh, bdiropq, bend; ++ struct aufs_hdentry *p; ++ //struct nameidata nd; ++ ++ LKTRTrace("%.*s, type 0%o\n", DLNPair(dentry), type); ++ DiMustWriteLock(dentry); ++ sb = dentry->d_sb; ++ DEBUG_ON(IS_ROOT(dentry)); ++ parent = dget_parent(dentry); ++ pgen = au_digen(parent); ++ sgen = au_sigen(sb); ++ dgen = au_digen(dentry); ++ DEBUG_ON(pgen != sgen); ++ ++ npositive = -ENOMEM; ++ new_sz = sizeof(*dinfo->di_hdentry) * (sbend(sb) + 1); ++ dinfo = dtodi(dentry); ++ p = au_kzrealloc(dinfo->di_hdentry, sizeof(*p) * (dinfo->di_bend + 1), ++ new_sz, GFP_KERNEL); ++ //p = NULL; ++ if (unlikely(!p)) ++ goto out; ++ dinfo->di_hdentry = p; ++ ++ bend = dinfo->di_bend; ++ bwh = dinfo->di_bwh; ++ bdiropq = dinfo->di_bdiropq; ++ p += dinfo->di_bstart; ++ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) { ++ struct dentry *hd, *hdp; ++ struct aufs_hdentry tmp, *q; ++ aufs_bindex_t new_bindex; ++ ++ hd = p->hd_dentry; ++ if (!hd) ++ continue; ++ hdp = dget_parent(hd); ++ if (hdp == au_h_dptr_i(parent, bindex)) { ++ dput(hdp); ++ continue; ++ } ++ ++ new_bindex = au_find_dbindex(parent, hdp); ++ dput(hdp); ++ DEBUG_ON(new_bindex == bindex); ++ if (dinfo->di_bwh == bindex) ++ bwh = new_bindex; ++ if (dinfo->di_bdiropq == bindex) ++ bdiropq = new_bindex; ++ if (new_bindex < 0) { // test here ++ hdput(p); ++ p->hd_dentry = NULL; ++ continue; ++ } ++ /* swap two hidden dentries, and loop again */ ++ q = dinfo->di_hdentry + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hd_dentry) { ++ bindex--; ++ p--; ++ } ++ } ++ ++ // test here ++ dinfo->di_bwh = -1; ++ if (unlikely(bwh != -1 && bwh <= sbend(sb) && sbr_is_whable(sb, bwh))) ++ dinfo->di_bwh = bwh; ++ dinfo->di_bdiropq = -1; ++ if (unlikely(bdiropq != -1 && bdiropq <= sbend(sb) ++ && sbr_is_whable(sb, bdiropq))) ++ dinfo->di_bdiropq = bdiropq; ++ parent_bend = dbend(parent); ++ p = dinfo->di_hdentry; ++ for (bindex = 0; bindex <= parent_bend; bindex++, p++) ++ if (p->hd_dentry) { ++ dinfo->di_bstart = bindex; ++ break; ++ } ++ p = dinfo->di_hdentry + parent_bend; ++ //for (bindex = parent_bend; bindex > dinfo->di_bstart; bindex--, p--) ++ for (bindex = parent_bend; bindex >= 0; bindex--, p--) ++ if (p->hd_dentry) { ++ dinfo->di_bend = bindex; ++ break; ++ } ++ ++ npositive = 0; ++ parent_bstart = dbstart(parent); ++ if (type != S_IFDIR && dinfo->di_bstart == parent_bstart) ++ goto out_dgen; /* success */ ++ ++#if 0 ++ nd.last_type = LAST_ROOT; ++ nd.flags = LOOKUP_FOLLOW; ++ nd.depth = 0; ++ nd.mnt = mntget(??); ++ nd.dentry = dget(parent); ++#endif ++ npositive = lkup_dentry(dentry, parent_bstart, type); ++ //if (LktrCond) npositive = -1; ++ if (npositive < 0) ++ goto out; ++ ++ out_dgen: ++ au_update_digen(dentry); ++ out: ++ dput(parent); ++ TraceErr(npositive); ++ return npositive; ++} ++ ++static int h_d_revalidate(struct dentry *dentry, struct nameidata *nd, ++ int do_udba) ++{ ++ int err, plus, locked, unhashed, is_root, h_plus, is_nfs; ++ struct nameidata fake_nd, *p; ++ aufs_bindex_t bindex, btail, bstart, ibs, ibe; ++ struct super_block *sb; ++ struct inode *inode, *first, *h_inode, *h_cached_inode; ++ umode_t mode, h_mode; ++ struct dentry *h_dentry; ++ int (*reval)(struct dentry *, struct nameidata *); ++ struct qstr *name; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ inode = dentry->d_inode; ++ DEBUG_ON(inode && au_digen(dentry) != au_iigen(inode)); ++ //DbgDentry(dentry); ++ //DbgInode(inode); ++ ++ err = 0; ++ sb = dentry->d_sb; ++ plus = 0; ++ mode = 0; ++ first = NULL; ++ ibs = ibe = -1; ++ unhashed = d_unhashed(dentry); ++ is_root = IS_ROOT(dentry); ++ name = &dentry->d_name; ++ ++ /* ++ * Theoretically, REVAL test should be unnecessary in case of INOTIFY. ++ * But inotify doesn't fire some necessary events, ++ * IN_ATTRIB for atime/nlink/pageio ++ * IN_DELETE for NFS dentry ++ * Let's do REVAL test too. ++ */ ++ if (do_udba && inode) { ++ mode = (inode->i_mode & S_IFMT); ++ plus = (inode->i_nlink > 0); ++ first = au_h_iptr(inode); ++ ibs = ibstart(inode); ++ ibe = ibend(inode); ++ } ++ ++ btail = bstart = dbstart(dentry); ++ if (inode && S_ISDIR(inode->i_mode)) ++ btail = dbtaildir(dentry); ++ locked = 0; ++ if (nd) { ++ fake_nd = *nd; ++#ifndef CONFIG_AUFS_FAKE_DM ++ if (dentry != nd->dentry) { ++ di_read_lock_parent(nd->dentry, 0); ++ locked = 1; ++ } ++#endif ++ } ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ h_dentry = au_h_dptr_i(dentry, bindex); ++ if (unlikely(!h_dentry)) ++ continue; ++ if (unlikely(do_udba ++ && !is_root ++ && (unhashed != d_unhashed(h_dentry) ++#if 1 ++ || name->len != h_dentry->d_name.len ++ || memcmp(name->name, h_dentry->d_name.name, ++ name->len) ++#endif ++ ))) { ++ LKTRTrace("unhash 0x%x 0x%x, %.*s %.*s\n", ++ unhashed, d_unhashed(h_dentry), ++ DLNPair(dentry), DLNPair(h_dentry)); ++ goto err; ++ } ++ ++ reval = NULL; ++ if (h_dentry->d_op) ++ reval = h_dentry->d_op->d_revalidate; ++ if (unlikely(reval)) { ++ //LKTRLabel(hidden reval); ++ p = fake_dm(&fake_nd, nd, sb, bindex); ++ DEBUG_ON(IS_ERR(p)); ++ err = !reval(h_dentry, p); ++ fake_dm_release(p); ++ if (unlikely(err)) { ++ //Dbg("here\n"); ++ goto err; ++ } ++ } ++ ++ if (unlikely(!do_udba)) ++ continue; ++ ++ /* UDBA tests */ ++ h_inode = h_dentry->d_inode; ++ if (unlikely(!!inode != !!h_inode)) { ++ //Dbg("here\n"); ++ goto err; ++ } ++ ++ h_plus = plus; ++ h_mode = mode; ++ h_cached_inode = h_inode; ++ is_nfs = 0; ++ if (h_inode) { ++ h_mode = (h_inode->i_mode & S_IFMT); ++ h_plus = (h_inode->i_nlink > 0); ++ } ++ if (inode && ibs <= bindex && bindex <= ibe) { ++ h_cached_inode = au_h_iptr_i(inode, bindex); ++ //is_nfs = au_is_nfs(h_cached_inode->i_sb); ++ } ++ ++ LKTRTrace("{%d, 0%o, %p}, h{%d, 0%o, %p}\n", ++ plus, mode, h_cached_inode, ++ h_plus, h_mode, h_inode); ++ if (unlikely(plus != h_plus || mode != h_mode ++ || (h_cached_inode != h_inode /* && !is_nfs */))) { ++ //Dbg("here\n"); ++ goto err; ++ } ++ continue; ++ ++ err: ++ err = -EINVAL; ++ break; ++ } ++#ifndef CONFIG_AUFS_FAKE_DM ++ if (unlikely(locked)) ++ di_read_unlock(nd->dentry, 0); ++#endif ++ ++#if 0 ++ // some filesystem uses CURRENT_TIME_SEC instead of CURRENT_TIME. ++ // NFS may stop IN_DELETE because of DCACHE_NFSFS_RENAMED. ++#if 0 ++ && (!timespec_equal(&inode->i_ctime, &first->i_ctime) ++ || !timespec_equal(&inode->i_atime, &first->i_atime)) ++#endif ++ if (unlikely(!err && udba && first)) ++ au_cpup_attr_all(inode); ++#endif ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int simple_reval_dpath(struct dentry *dentry, int sgen) ++{ ++ int err; ++ mode_t type; ++ struct dentry *parent; ++ struct inode *inode; ++ ++ LKTRTrace("%.*s, sgen %d\n", DLNPair(dentry), sgen); ++ SiMustAnyLock(dentry->d_sb); ++ DiMustWriteLock(dentry); ++ inode = dentry->d_inode; ++ DEBUG_ON(!inode); ++ ++ if (au_digen(dentry) == sgen) ++ return 0; ++ ++ parent = dget_parent(dentry); ++ di_read_lock_parent(parent, AUFS_I_RLOCK); ++ DEBUG_ON(au_digen(parent) != sgen); ++#ifdef CONFIG_AUFS_DEBUG ++ { ++ struct dentry *d = parent; ++ while (!IS_ROOT(d)) { ++ DEBUG_ON(au_digen(d) != sgen); ++ d = d->d_parent; ++ } ++ } ++#endif ++ type = (inode->i_mode & S_IFMT); ++ /* returns a number of positive dentries */ ++ err = au_refresh_hdentry(dentry, type); ++ if (err >= 0) ++ err = au_refresh_hinode(inode, dentry); ++ di_read_unlock(parent, AUFS_I_RLOCK); ++ dput(parent); ++ TraceErr(err); ++ return err; ++} ++ ++int au_reval_dpath(struct dentry *dentry, int sgen) ++{ ++ int err; ++ struct dentry *d, *parent; ++ struct inode *inode; ++ ++ LKTRTrace("%.*s, sgen %d\n", DLNPair(dentry), sgen); ++ DEBUG_ON(!dentry->d_inode); ++ DiMustWriteLock(dentry); ++ ++ if (!stosi(dentry->d_sb)->si_failed_refresh_dirs) ++ return simple_reval_dpath(dentry, sgen); ++ ++ /* slow loop, keep it simple and stupid */ ++ /* cf: cpup_dirs() */ ++ err = 0; ++ while (au_digen(dentry) != sgen) { ++ d = dentry; ++ while (1) { ++ parent = d->d_parent; // dget_parent() ++ if (au_digen(parent) == sgen) ++ break; ++ d = parent; ++ } ++ ++ inode = d->d_inode; ++ if (d != dentry) { ++ //i_lock(inode); ++ di_write_lock_child(d); ++ } ++ ++ /* someone might update our dentry while we were sleeping */ ++ if (au_digen(d) != sgen) { ++ di_read_lock_parent(parent, AUFS_I_RLOCK); ++ /* returns a number of positive dentries */ ++ err = au_refresh_hdentry(d, inode->i_mode & S_IFMT); ++ //err = -1; ++ if (err >= 0) ++ err = au_refresh_hinode(inode, d); ++ //err = -1; ++ di_read_unlock(parent, AUFS_I_RLOCK); ++ } ++ ++ if (d != dentry) { ++ di_write_unlock(d); ++ //i_unlock(inode); ++ } ++ if (unlikely(err)) ++ break; ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * THIS IS A BOOLEAN FUNCTION: returns 1 if valid, 0 otherwise. ++ * nfsd passes NULL as nameidata. ++ */ ++static int aufs_d_revalidate(struct dentry *dentry, struct nameidata *nd) ++{ ++ int valid, sgen, err, do_udba; ++ struct super_block *sb; ++ struct inode *inode; ++ ++ LKTRTrace("dentry %.*s\n", DLNPair(dentry)); ++ if (nd && nd->dentry) ++ LKTRTrace("nd %.*s\n", DLNPair(nd->dentry)); ++ //dir case: DEBUG_ON(dentry->d_parent != nd->dentry); ++ //remove failure case: DEBUG_ON(!IS_ROOT(dentry) && d_unhashed(dentry)); ++ DEBUG_ON(!dentry->d_fsdata); ++ //DbgDentry(dentry); ++ ++ err = -EINVAL; ++ inode = dentry->d_inode; ++ //DbgInode(inode); ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ sgen = au_sigen(sb); ++ if (au_digen(dentry) == sgen) ++ di_read_lock_child(dentry, !AUFS_I_RLOCK); ++ else { ++ DEBUG_ON(IS_ROOT(dentry)); ++#ifdef ForceInotify ++ Dbg("UDBA or digen, %.*s\n", DLNPair(dentry)); ++#endif ++ //i_lock(inode); ++ di_write_lock_child(dentry); ++ if (inode) ++ err = au_reval_dpath(dentry, sgen); ++ //err = -1; ++ di_downgrade_lock(dentry, AUFS_I_RLOCK); ++ //i_unlock(inode); ++ if (unlikely(err)) ++ goto out; ++ ii_read_unlock(inode); ++ DEBUG_ON(au_iigen(inode) != sgen); ++ } ++ ++ if (inode) { ++ if (au_iigen(inode) == sgen) ++ ii_read_lock_child(inode); ++ else { ++ DEBUG_ON(IS_ROOT(dentry)); ++#ifdef ForceInotify ++ Dbg("UDBA or survived, %.*s\n", DLNPair(dentry)); ++#endif ++ ii_write_lock_child(inode); ++ err = au_refresh_hinode(inode, dentry); ++ ii_downgrade_lock(inode); ++ if (unlikely(err)) ++ goto out; ++ DEBUG_ON(au_iigen(inode) != sgen); ++ } ++ } ++ ++#if 0 // fix it ++ /* parent dir i_nlink is not updated in the case of setattr */ ++ if (S_ISDIR(inode->i_mode)) { ++ i_lock(inode); ++ ii_write_lock(inode); ++ au_cpup_attr_nlink(inode); ++ ii_write_unlock(inode); ++ i_unlock(inode); ++ } ++#endif ++ ++ err = -EINVAL; ++ do_udba = !au_flag_test(sb, AuFlag_UDBA_NONE); ++ if (do_udba && inode && ibstart(inode) >= 0 ++ && au_test_higen(inode, au_h_iptr(inode))) ++ goto out; ++ err = h_d_revalidate(dentry, nd, do_udba); ++ //err = -1; ++ ++ out: ++ aufs_read_unlock(dentry, AUFS_I_RLOCK); ++ TraceErr(err); ++ valid = !err; ++ //au_debug_on(); ++ if (!valid) ++ LKTRTrace("%.*s invalid\n", DLNPair(dentry)); ++ //au_debug_off(); ++ return valid; ++} ++ ++static void aufs_d_release(struct dentry *dentry) ++{ ++ struct aufs_dinfo *dinfo; ++ aufs_bindex_t bend, bindex; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ DEBUG_ON(!d_unhashed(dentry)); ++ ++ dinfo = dentry->d_fsdata; ++ if (unlikely(!dinfo)) ++ return; ++ ++ /* dentry may not be revalidated */ ++ bindex = dinfo->di_bstart; ++ if (bindex >= 0) { ++ struct aufs_hdentry *p; ++ bend = dinfo->di_bend; ++ DEBUG_ON(bend < bindex); ++ p = dinfo->di_hdentry + bindex; ++ while (bindex++ <= bend) { ++ if (p->hd_dentry) ++ hdput(p); ++ p++; ++ } ++ } ++ kfree(dinfo->di_hdentry); ++ cache_free_dinfo(dinfo); ++} ++ ++#if 0 ++/* it may be called at remount time, too */ ++static void aufs_d_iput(struct dentry *dentry, struct inode *inode) ++{ ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s, i%lu\n", DLNPair(dentry), inode->i_ino); ++ ++ sb = dentry->d_sb; ++#if 0 ++ si_read_lock(sb); ++ if (unlikely(au_flag_test(sb, AuFlag_PLINK) ++ && au_is_plinked(sb, inode))) { ++ ii_write_lock(inode); ++ au_update_brange(inode, 1); ++ ii_write_unlock(inode); ++ } ++ si_read_unlock(sb); ++#endif ++ iput(inode); ++} ++#endif ++ ++struct dentry_operations aufs_dop = { ++ .d_revalidate = aufs_d_revalidate, ++ .d_release = aufs_d_release ++ //.d_iput = aufs_d_iput ++}; +diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h +new file mode 100755 +index 0000000..78049e3 +--- /dev/null ++++ b/fs/aufs/dentry.h +@@ -0,0 +1,183 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: dentry.h,v 1.25 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_DENTRY_H__ ++#define __AUFS_DENTRY_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/aufs_type.h> ++#include "misc.h" ++ ++struct aufs_hdentry { ++ struct dentry *hd_dentry; ++}; ++ ++struct aufs_dinfo { ++ atomic_t di_generation; ++ ++ struct aufs_rwsem di_rwsem; ++ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq; ++ struct aufs_hdentry *di_hdentry; ++}; ++ ++struct lkup_args { ++ struct vfsmount *nfsmnt; ++ int dlgt; ++ //struct super_block *sb; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* dentry.c */ ++#if defined(CONFIG_AUFS_LHASH_PATCH) || defined(CONFIG_AUFS_DLGT) ++struct dentry *lkup_one(const char *name, struct dentry *parent, int len, ++ struct lkup_args *lkup); ++#else ++static inline ++struct dentry *lkup_one(const char *name, struct dentry *parent, int len, ++ struct lkup_args *lkup) ++{ ++ return lookup_one_len(name, parent, len); ++} ++#endif ++ ++extern struct dentry_operations aufs_dop; ++struct dentry *sio_lkup_one(const char *name, struct dentry *parent, int len, ++ struct lkup_args *lkup); ++int lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type); ++int lkup_neg(struct dentry *dentry, aufs_bindex_t bindex); ++int au_refresh_hdentry(struct dentry *dentry, mode_t type); ++int au_reval_dpath(struct dentry *dentry, int sgen); ++ ++/* dinfo.c */ ++int au_alloc_dinfo(struct dentry *dentry); ++struct aufs_dinfo *dtodi(struct dentry *dentry); ++ ++void di_read_lock(struct dentry *d, int flags, unsigned int lsc); ++void di_read_unlock(struct dentry *d, int flags); ++void di_downgrade_lock(struct dentry *d, int flags); ++void di_write_lock(struct dentry *d, unsigned int lsc); ++void di_write_unlock(struct dentry *d); ++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir); ++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir); ++void di_write_unlock2(struct dentry *d1, struct dentry *d2); ++ ++aufs_bindex_t dbstart(struct dentry *dentry); ++aufs_bindex_t dbend(struct dentry *dentry); ++aufs_bindex_t dbwh(struct dentry *dentry); ++aufs_bindex_t dbdiropq(struct dentry *dentry); ++struct dentry *au_h_dptr_i(struct dentry *dentry, aufs_bindex_t bindex); ++struct dentry *au_h_dptr(struct dentry *dentry); ++ ++aufs_bindex_t dbtail(struct dentry *dentry); ++aufs_bindex_t dbtaildir(struct dentry *dentry); ++aufs_bindex_t dbtail_generic(struct dentry *dentry); ++ ++void set_dbstart(struct dentry *dentry, aufs_bindex_t bindex); ++void set_dbend(struct dentry *dentry, aufs_bindex_t bindex); ++void set_dbwh(struct dentry *dentry, aufs_bindex_t bindex); ++void set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex); ++void hdput(struct aufs_hdentry *hdentry); ++void set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_dentry); ++ ++void au_update_digen(struct dentry *dentry); ++void au_update_dbstart(struct dentry *dentry); ++int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline int au_digen(struct dentry *d) ++{ ++ return atomic_read(&dtodi(d)->di_generation); ++} ++ ++#ifdef CONFIG_AUFS_HINOTIFY ++static inline void au_digen_dec(struct dentry *d) ++{ ++ atomic_dec(&dtodi(d)->di_generation); ++} ++#endif /* CONFIG_AUFS_HINOTIFY */ ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* lock subclass for dinfo */ ++enum { ++ AuLsc_DI_CHILD, /* child first */ ++ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hinotify */ ++ AuLsc_DI_CHILD3, /* copyup dirs */ ++ AuLsc_DI_PARENT, ++ AuLsc_DI_PARENT2, ++ AuLsc_DI_PARENT3 ++}; ++ ++/* ++ * di_read_lock_child, di_write_lock_child, ++ * di_read_lock_child2, di_write_lock_child2, ++ * di_read_lock_child3, di_write_lock_child3, ++ * di_read_lock_parent, di_write_lock_parent, ++ * di_read_lock_parent2, di_write_lock_parent2, ++ * di_read_lock_parent3, di_write_lock_parent3, ++ */ ++#define ReadLockFunc(name, lsc) \ ++static inline void di_read_lock_##name(struct dentry *d, int flags) \ ++{di_read_lock(d, flags, AuLsc_DI_##lsc);} ++ ++#define WriteLockFunc(name, lsc) \ ++static inline void di_write_lock_##name(struct dentry *d) \ ++{di_write_lock(d, AuLsc_DI_##lsc);} ++ ++#define RWLockFuncs(name, lsc) \ ++ ReadLockFunc(name, lsc); \ ++ WriteLockFunc(name, lsc) ++ ++RWLockFuncs(child, CHILD); ++RWLockFuncs(child2, CHILD2); ++RWLockFuncs(child3, CHILD3); ++RWLockFuncs(parent, PARENT); ++RWLockFuncs(parent2, PARENT2); ++RWLockFuncs(parent3, PARENT3); ++ ++#undef ReadLockFunc ++#undef WriteLockFunc ++#undef RWLockFunc ++ ++/* to debug easier, do not make them inlined functions */ ++#define DiMustReadLock(d) do { \ ++ SiMustAnyLock((d)->d_sb); \ ++ RwMustReadLock(&dtodi(d)->di_rwsem); \ ++} while (0) ++ ++#define DiMustWriteLock(d) do { \ ++ SiMustAnyLock((d)->d_sb); \ ++ RwMustWriteLock(&dtodi(d)->di_rwsem); \ ++} while (0) ++ ++#define DiMustAnyLock(d) do { \ ++ SiMustAnyLock((d)->d_sb); \ ++ RwMustAnyLock(&dtodi(d)->di_rwsem); \ ++} while (0) ++ ++#define DiMustNoWaiters(d) RwMustNoWaiters(&dtodi(d)->di_rwsem) ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_DENTRY_H__ */ +diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c +new file mode 100755 +index 0000000..6082149 +--- /dev/null ++++ b/fs/aufs/dinfo.c +@@ -0,0 +1,419 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: dinfo.c,v 1.23 2007/05/07 03:43:36 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++int au_alloc_dinfo(struct dentry *dentry) ++{ ++ struct aufs_dinfo *dinfo; ++ struct super_block *sb; ++ int nbr; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ DEBUG_ON(dentry->d_fsdata); ++ ++ dinfo = cache_alloc_dinfo(); ++ //if (LktrCond) {cache_free_dinfo(dinfo); dinfo = NULL;} ++ if (dinfo) { ++ sb = dentry->d_sb; ++ nbr = sbend(sb) + 1; ++ if (unlikely(!nbr)) ++ nbr++; ++ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), ++ GFP_KERNEL); ++ //if (LktrCond) ++ //{kfree(dinfo->di_hdentry); dinfo->di_hdentry = NULL;} ++ if (dinfo->di_hdentry) { ++ rw_init_wlock_nested(&dinfo->di_rwsem, AuLsc_DI_PARENT); ++ dinfo->di_bstart = dinfo->di_bend = -1; ++ dinfo->di_bwh = dinfo->di_bdiropq = -1; ++ atomic_set(&dinfo->di_generation, au_sigen(sb)); ++ ++ dentry->d_fsdata = dinfo; ++ dentry->d_op = &aufs_dop; ++ return 0; /* success */ ++ } ++ cache_free_dinfo(dinfo); ++ } ++ TraceErr(-ENOMEM); ++ return -ENOMEM; ++} ++ ++struct aufs_dinfo *dtodi(struct dentry *dentry) ++{ ++ struct aufs_dinfo *dinfo = dentry->d_fsdata; ++ DEBUG_ON(!dinfo ++ || !dinfo->di_hdentry ++ /* || stosi(dentry->d_sb)->si_bend < dinfo->di_bend */ ++ || dinfo->di_bend < dinfo->di_bstart ++ /* dbwh can be outside of this range */ ++ || (0 <= dinfo->di_bdiropq ++ && (dinfo->di_bdiropq < dinfo->di_bstart ++ /* || dinfo->di_bend < dinfo->di_bdiropq */)) ++ ); ++ return dinfo; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static void do_ii_write_lock(struct inode *inode, unsigned int lsc) ++{ ++ switch (lsc) { ++ case AuLsc_DI_CHILD: ++ ii_write_lock_child(inode); ++ break; ++ case AuLsc_DI_CHILD2: ++ ii_write_lock_child2(inode); ++ break; ++ case AuLsc_DI_CHILD3: ++ ii_write_lock_child3(inode); ++ break; ++ case AuLsc_DI_PARENT: ++ ii_write_lock_parent(inode); ++ break; ++ case AuLsc_DI_PARENT2: ++ ii_write_lock_parent2(inode); ++ break; ++ case AuLsc_DI_PARENT3: ++ ii_write_lock_parent3(inode); ++ break; ++ default: ++ BUG(); ++ } ++} ++ ++static void do_ii_read_lock(struct inode *inode, unsigned int lsc) ++{ ++ switch (lsc) { ++ case AuLsc_DI_CHILD: ++ ii_read_lock_child(inode); ++ break; ++ case AuLsc_DI_CHILD2: ++ ii_read_lock_child2(inode); ++ break; ++ case AuLsc_DI_CHILD3: ++ ii_read_lock_child3(inode); ++ break; ++ case AuLsc_DI_PARENT: ++ ii_read_lock_parent(inode); ++ break; ++ case AuLsc_DI_PARENT2: ++ ii_read_lock_parent2(inode); ++ break; ++ case AuLsc_DI_PARENT3: ++ ii_read_lock_parent3(inode); ++ break; ++ default: ++ BUG(); ++ } ++} ++ ++void di_read_lock(struct dentry *d, int flags, unsigned int lsc) ++{ ++ SiMustAnyLock(d->d_sb); ++ // todo: always nested? ++ rw_read_lock_nested(&dtodi(d)->di_rwsem, lsc); ++ if (d->d_inode) { ++ if (flags & AUFS_I_WLOCK) ++ do_ii_write_lock(d->d_inode, lsc); ++ else if (flags & AUFS_I_RLOCK) ++ do_ii_read_lock(d->d_inode, lsc); ++ } ++} ++ ++void di_read_unlock(struct dentry *d, int flags) ++{ ++ SiMustAnyLock(d->d_sb); ++ if (d->d_inode) { ++ if (flags & AUFS_I_WLOCK) ++ ii_write_unlock(d->d_inode); ++ else if (flags & AUFS_I_RLOCK) ++ ii_read_unlock(d->d_inode); ++ } ++ rw_read_unlock(&dtodi(d)->di_rwsem); ++} ++ ++void di_downgrade_lock(struct dentry *d, int flags) ++{ ++ SiMustAnyLock(d->d_sb); ++ rw_dgrade_lock(&dtodi(d)->di_rwsem); ++ if (d->d_inode && (flags & AUFS_I_RLOCK)) ++ ii_downgrade_lock(d->d_inode); ++} ++ ++void di_write_lock(struct dentry *d, unsigned int lsc) ++{ ++ SiMustAnyLock(d->d_sb); ++ // todo: always nested? ++ rw_write_lock_nested(&dtodi(d)->di_rwsem, lsc); ++ if (d->d_inode) ++ do_ii_write_lock(d->d_inode, lsc); ++} ++ ++void di_write_unlock(struct dentry *d) ++{ ++ SiMustAnyLock(d->d_sb); ++ if (d->d_inode) ++ ii_write_unlock(d->d_inode); ++ rw_write_unlock(&dtodi(d)->di_rwsem); ++} ++ ++void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir) ++{ ++ struct dentry *d; ++ ++ TraceEnter(); ++ DEBUG_ON(d1 == d2 ++ || d1->d_inode == d2->d_inode ++ || d1->d_sb != d2->d_sb); ++ ++ if (isdir) ++ for (d = d1; d->d_parent != d; d = d->d_parent) // dget_parent() ++ if (d->d_parent == d2) { ++ di_write_lock_child(d1); ++ di_write_lock_child2(d2); ++ return; ++ } ++ ++ di_write_lock_child(d2); ++ di_write_lock_child2(d1); ++} ++ ++void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir) ++{ ++ struct dentry *d; ++ ++ TraceEnter(); ++ DEBUG_ON(d1 == d2 ++ || d1->d_inode == d2->d_inode ++ || d1->d_sb != d2->d_sb); ++ ++ if (isdir) ++ for (d = d1; d->d_parent != d; d = d->d_parent) // dget_parent() ++ if (d->d_parent == d2) { ++ di_write_lock_parent(d1); ++ di_write_lock_parent2(d2); ++ return; ++ } ++ ++ di_write_lock_parent(d2); ++ di_write_lock_parent2(d1); ++} ++ ++void di_write_unlock2(struct dentry *d1, struct dentry *d2) ++{ ++ di_write_unlock(d1); ++ if (d1->d_inode == d2->d_inode) ++ rw_write_unlock(&dtodi(d2)->di_rwsem); ++ else ++ di_write_unlock(d2); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++aufs_bindex_t dbstart(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ return dtodi(dentry)->di_bstart; ++} ++ ++aufs_bindex_t dbend(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ return dtodi(dentry)->di_bend; ++} ++ ++aufs_bindex_t dbwh(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ return dtodi(dentry)->di_bwh; ++} ++ ++aufs_bindex_t dbdiropq(struct dentry *dentry) ++{ ++ DiMustAnyLock(dentry); ++ DEBUG_ON(dentry->d_inode ++ && dentry->d_inode->i_mode ++ && !S_ISDIR(dentry->d_inode->i_mode)); ++ return dtodi(dentry)->di_bdiropq; ++} ++ ++struct dentry *au_h_dptr_i(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ struct dentry *d; ++ ++ DiMustAnyLock(dentry); ++ if (dbstart(dentry) < 0 || bindex < dbstart(dentry)) ++ return NULL; ++ DEBUG_ON(bindex < 0 ++ /* || bindex > sbend(dentry->d_sb) */); ++ d = dtodi(dentry)->di_hdentry[0 + bindex].hd_dentry; ++ DEBUG_ON(d && (atomic_read(&d->d_count) <= 0)); ++ return d; ++} ++ ++struct dentry *au_h_dptr(struct dentry *dentry) ++{ ++ return au_h_dptr_i(dentry, dbstart(dentry)); ++} ++ ++aufs_bindex_t dbtail(struct dentry *dentry) ++{ ++ aufs_bindex_t bend, bwh; ++ ++ bend = dbend(dentry); ++ if (0 <= bend) { ++ bwh = dbwh(dentry); ++ //DEBUG_ON(bend < bwh); ++ if (!bwh) ++ return bwh; ++ if (0 < bwh && bwh < bend) ++ return bwh - 1; ++ } ++ return bend; ++} ++ ++aufs_bindex_t dbtaildir(struct dentry *dentry) ++{ ++ aufs_bindex_t bend, bopq; ++ ++ DEBUG_ON(dentry->d_inode ++ && dentry->d_inode->i_mode ++ && !S_ISDIR(dentry->d_inode->i_mode)); ++ ++ bend = dbtail(dentry); ++ if (0 <= bend) { ++ bopq = dbdiropq(dentry); ++ DEBUG_ON(bend < bopq); ++ if (0 <= bopq && bopq < bend) ++ bend = bopq; ++ } ++ return bend; ++} ++ ++aufs_bindex_t dbtail_generic(struct dentry *dentry) ++{ ++ struct inode *inode; ++ ++ inode = dentry->d_inode; ++ if (inode && S_ISDIR(inode->i_mode)) ++ return dbtaildir(dentry); ++ else ++ return dbtail(dentry); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++// hard/soft set ++void set_dbstart(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ DEBUG_ON(sbend(dentry->d_sb) < bindex); ++ /* */ ++ dtodi(dentry)->di_bstart = bindex; ++} ++ ++void set_dbend(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ DEBUG_ON(sbend(dentry->d_sb) < bindex ++ || bindex < dbstart(dentry)); ++ dtodi(dentry)->di_bend = bindex; ++} ++ ++void set_dbwh(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ DEBUG_ON(sbend(dentry->d_sb) < bindex); ++ /* dbwh can be outside of bstart - bend range */ ++ dtodi(dentry)->di_bwh = bindex; ++} ++ ++void set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ DiMustWriteLock(dentry); ++ DEBUG_ON(sbend(dentry->d_sb) < bindex); ++ DEBUG_ON((bindex != -1 ++ && (bindex < dbstart(dentry) || dbend(dentry) < bindex)) ++ || (dentry->d_inode ++ && dentry->d_inode->i_mode ++ && !S_ISDIR(dentry->d_inode->i_mode))); ++ dtodi(dentry)->di_bdiropq = bindex; ++} ++ ++void hdput(struct aufs_hdentry *hd) ++{ ++ dput(hd->hd_dentry); ++} ++ ++void set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_dentry) ++{ ++ struct aufs_hdentry *hd = dtodi(dentry)->di_hdentry + bindex; ++ DiMustWriteLock(dentry); ++ DEBUG_ON(bindex < dtodi(dentry)->di_bstart ++ || bindex > dtodi(dentry)->di_bend ++ || (h_dentry && atomic_read(&h_dentry->d_count) <= 0) ++ || (h_dentry && hd->hd_dentry) ++ ); ++ if (hd->hd_dentry) ++ hdput(hd); ++ hd->hd_dentry = h_dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_update_digen(struct dentry *dentry) ++{ ++ //DiMustWriteLock(dentry); ++ DEBUG_ON(!dentry->d_sb); ++ atomic_set(&dtodi(dentry)->di_generation, au_sigen(dentry->d_sb)); ++} ++ ++void au_update_dbstart(struct dentry *dentry) ++{ ++ aufs_bindex_t bindex, bstart = dbstart(dentry), bend = dbend(dentry); ++ struct dentry *hidden_dentry; ++ ++ DiMustWriteLock(dentry); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ if (hidden_dentry->d_inode) { ++ set_dbstart(dentry, bindex); ++ return; ++ } ++ set_h_dptr(dentry, bindex, NULL); ++ } ++ //set_dbstart(dentry, -1); ++ //set_dbend(dentry, -1); ++} ++ ++int au_find_dbindex(struct dentry *dentry, struct dentry *hidden_dentry) ++{ ++ aufs_bindex_t bindex, bend; ++ ++ bend = dbend(dentry); ++ for (bindex = dbstart(dentry); bindex <= bend; bindex++) ++ if (au_h_dptr_i(dentry, bindex) == hidden_dentry) ++ return bindex; ++ return -1; ++} +diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c +new file mode 100755 +index 0000000..9afb1a9 +--- /dev/null ++++ b/fs/aufs/dir.c +@@ -0,0 +1,564 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: dir.c,v 1.36 2007/05/14 03:38:52 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++static int reopen_dir(struct file *file) ++{ ++ int err; ++ struct dentry *dentry, *hidden_dentry; ++ aufs_bindex_t bindex, btail, bstart; ++ struct file *hidden_file; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ DEBUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); ++ ++ /* open all hidden dirs */ ++ bstart = dbstart(dentry); ++#if 1 ++ for (bindex = fbstart(file); bindex < bstart; bindex++) ++ set_h_fptr(file, bindex, NULL); ++#endif ++ set_fbstart(file, bstart); ++ btail = dbtaildir(dentry); ++#if 1 ++ for (bindex = fbend(file); btail < bindex; bindex--) ++ set_h_fptr(file, bindex, NULL); ++#endif ++ set_fbend(file, btail); ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ hidden_file = au_h_fptr_i(file, bindex); ++ if (hidden_file) { ++ DEBUG_ON(hidden_file->f_dentry != hidden_dentry); ++ continue; ++ } ++ ++ hidden_file = hidden_open(dentry, bindex, file->f_flags); ++ // unavailable ++ //if (LktrCond) {fput(hidden_file); ++ //br_put(stobr(dentry->d_sb, bindex));hidden_file=ERR_PTR(-1);} ++ err = PTR_ERR(hidden_file); ++ if (IS_ERR(hidden_file)) ++ goto out; // close all? ++ //cpup_file_flags(hidden_file, file); ++ set_h_fptr(file, bindex, hidden_file); ++ } ++ err = 0; ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static int do_open_dir(struct file *file, int flags) ++{ ++ int err; ++ aufs_bindex_t bindex, btail; ++ struct dentry *dentry, *hidden_dentry; ++ struct file *hidden_file; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, 0x%x\n", DLNPair(dentry), flags); ++ DEBUG_ON(!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode)); ++ ++ err = 0; ++ set_fvdir_cache(file, NULL); ++ file->f_version = dentry->d_inode->i_version; ++ bindex = dbstart(dentry); ++ set_fbstart(file, bindex); ++ btail = dbtaildir(dentry); ++ set_fbend(file, btail); ++ for (; !err && bindex <= btail; bindex++) { ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ ++ hidden_file = hidden_open(dentry, bindex, flags); ++ //if (LktrCond) {fput(hidden_file); ++ //br_put(stobr(dentry->d_sb, bindex));hidden_file=ERR_PTR(-1);} ++ if (!IS_ERR(hidden_file)) { ++ set_h_fptr(file, bindex, hidden_file); ++ continue; ++ } ++ err = PTR_ERR(hidden_file); ++ } ++ if (!err) ++ return 0; /* success */ ++ ++ /* close all */ ++ for (bindex = fbstart(file); !err && bindex <= btail; bindex++) ++ set_h_fptr(file, bindex, NULL); ++ set_fbstart(file, -1); ++ set_fbend(file, -1); ++ return err; ++} ++ ++static int aufs_open_dir(struct inode *inode, struct file *file) ++{ ++ return au_do_open(inode, file, do_open_dir); ++} ++ ++static int aufs_release_dir(struct inode *inode, struct file *file) ++{ ++ struct aufs_vdir *vdir_cache; ++ struct super_block *sb; ++ ++ LKTRTrace("i%lu, %.*s\n", inode->i_ino, DLNPair(file->f_dentry)); ++ ++ sb = file->f_dentry->d_sb; ++ si_read_lock(sb); ++ fi_write_lock(file); ++ vdir_cache = fvdir_cache(file); ++ if (vdir_cache) ++ free_vdir(vdir_cache); ++ fi_write_unlock(file); ++ au_fin_finfo(file); ++ si_read_unlock(sb); ++ return 0; ++} ++ ++static int fsync_dir(struct dentry *dentry, int datasync) ++{ ++ int err; ++ struct inode *inode; ++ struct super_block *sb; ++ aufs_bindex_t bend, bindex; ++ ++ LKTRTrace("%.*s, %d\n", DLNPair(dentry), datasync); ++ DiMustAnyLock(dentry); ++ sb = dentry->d_sb; ++ SiMustAnyLock(sb); ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ IiMustAnyLock(inode); ++ ++ err = 0; ++ bend = dbend(dentry); ++ for (bindex = dbstart(dentry); !err && bindex <= bend; bindex++) { ++ struct dentry *h_dentry; ++ struct inode *h_inode; ++ struct file_operations *fop; ++ ++ if (test_ro(sb, bindex, inode)) ++ continue; ++ h_dentry = au_h_dptr_i(dentry, bindex); ++ if (!h_dentry) ++ continue; ++ h_inode = h_dentry->d_inode; ++ if (!h_inode) ++ continue; ++ ++ /* cf. fs/nsfd/vfs.c and fs/nfsd/nfs4recover.c */ ++ //hdir_lock(h_inode, inode, bindex); ++ i_lock(h_inode); ++ fop = (void*)h_inode->i_fop; ++ err = filemap_fdatawrite(h_inode->i_mapping); ++ if (!err && fop && fop->fsync) ++ err = fop->fsync(NULL, h_dentry, datasync); ++ if (!err) ++ err = filemap_fdatawrite(h_inode->i_mapping); ++ //hdir_unlock(h_inode, inode, bindex); ++ i_unlock(h_inode); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * @file may be NULL ++ */ ++static int aufs_fsync_dir(struct file *file, struct dentry *dentry, ++ int datasync) ++{ ++ int err; ++ struct inode *inode; ++ struct file *hidden_file; ++ struct super_block *sb; ++ aufs_bindex_t bend, bindex; ++ ++ LKTRTrace("%.*s, %d\n", DLNPair(dentry), datasync); ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ ++ err = 0; ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ if (file) { ++ err = au_reval_and_lock_finfo(file, reopen_dir, /*wlock*/1, ++ /*locked*/1); ++ //err = -1; ++ if (unlikely(err)) ++ goto out; ++ } else ++ di_read_lock_child(dentry, !AUFS_I_WLOCK); ++ ++ ii_write_lock_child(inode); ++ if (file) { ++ bend = fbend(file); ++ for (bindex = fbstart(file); !err && bindex <= bend; bindex++) { ++ hidden_file = au_h_fptr_i(file, bindex); ++ if (!hidden_file || test_ro(sb, bindex, inode)) ++ continue; ++ ++ err = -EINVAL; ++ if (hidden_file->f_op && hidden_file->f_op->fsync) { ++ // todo: try do_fsync() in fs/sync.c ++#if 0 ++ DEBUG_ON(hidden_file->f_dentry->d_inode ++ != au_h_iptr_i(inode, bindex)); ++ hdir_lock(hidden_file->f_dentry->d_inode, inode, ++ bindex); ++#else ++ i_lock(hidden_file->f_dentry->d_inode); ++#endif ++ err = hidden_file->f_op->fsync ++ (hidden_file, hidden_file->f_dentry, ++ datasync); ++ //err = -1; ++#if 0 ++ hdir_unlock(hidden_file->f_dentry->d_inode, ++ inode, bindex); ++#else ++ i_unlock(hidden_file->f_dentry->d_inode); ++#endif ++ } ++ } ++ } else ++ err = fsync_dir(dentry, datasync); ++ au_cpup_attr_timesizes(inode); ++ ii_write_unlock(inode); ++ if (file) ++ fi_write_unlock(file); ++ else ++ di_read_unlock(dentry, !AUFS_I_WLOCK); ++ ++ out: ++ si_read_unlock(sb); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int aufs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, pos %Ld\n", DLNPair(dentry), file->f_pos); ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ ++ au_nfsd_lockdep_off(); ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = au_reval_and_lock_finfo(file, reopen_dir, /*wlock*/1, ++ /*locked*/1); ++ if (unlikely(err)) ++ goto out; ++ ++ ii_write_lock_child(inode); ++ err = au_init_vdir(file); ++ if (unlikely(err)) { ++ ii_write_unlock(inode); ++ goto out_unlock; ++ } ++ //DbgVdir(fvdir_cache(file));// goto out_unlock; ++ ++ /* nfsd filldir calls lookup_one_len(). */ ++ ii_downgrade_lock(inode); ++ err = au_fill_de(file, dirent, filldir); ++ //DbgVdir(fvdir_cache(file));// goto out_unlock; ++ ++ inode->i_atime = au_h_iptr(inode)->i_atime; ++ ii_read_unlock(inode); ++ ++ out_unlock: ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ au_nfsd_lockdep_on(); ++#if 0 // debug ++ if (LktrCond) ++ igrab(inode); ++#endif ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct test_empty_arg { ++ struct aufs_nhash *whlist; ++ int whonly; ++ aufs_bindex_t bindex; ++ int err, called; ++}; ++ ++static int test_empty_cb(void *__arg, const char *__name, int namelen, ++ loff_t offset, filldir_ino_t ino, unsigned int d_type) ++{ ++ struct test_empty_arg *arg = __arg; ++ char *name = (void*)__name; ++ ++ LKTRTrace("%.*s\n", namelen, name); ++ ++ arg->err = 0; ++ arg->called++; ++ //smp_mb(); ++ if (name[0] == '.' ++ && (namelen == 1 || (name[1] == '.' && namelen == 2))) ++ return 0; /* success */ ++ ++ if (namelen <= AUFS_WH_PFX_LEN ++ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { ++ if (arg->whonly && !test_known_wh(arg->whlist, name, namelen)) ++ arg->err = -ENOTEMPTY; ++ goto out; ++ } ++ ++ name += AUFS_WH_PFX_LEN; ++ namelen -= AUFS_WH_PFX_LEN; ++ if (!test_known_wh(arg->whlist, name, namelen)) ++ arg->err = append_wh(arg->whlist, name, namelen, arg->bindex); ++ ++ out: ++ //smp_mb(); ++ TraceErr(arg->err); ++ return arg->err; ++} ++ ++static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg) ++{ ++ int err, dlgt; ++ struct file *hidden_file; ++ ++ LKTRTrace("%.*s, {%p, %d, %d}\n", ++ DLNPair(dentry), arg->whlist, arg->whonly, arg->bindex); ++ ++ hidden_file = hidden_open(dentry, arg->bindex, ++ O_RDONLY | O_NONBLOCK | O_DIRECTORY ++ | O_LARGEFILE); ++ err = PTR_ERR(hidden_file); ++ if (IS_ERR(hidden_file)) ++ goto out; ++ ++ dlgt = need_dlgt(dentry->d_sb); ++ //hidden_file->f_pos = 0; ++ do { ++ arg->err = 0; ++ arg->called = 0; ++ //smp_mb(); ++ err = vfsub_readdir(hidden_file, test_empty_cb, arg, dlgt); ++ if (err >= 0) ++ err = arg->err; ++ } while (!err && arg->called); ++ fput(hidden_file); ++ sbr_put(dentry->d_sb, arg->bindex); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++struct do_test_empty_args { ++ int *errp; ++ struct dentry *dentry; ++ struct test_empty_arg *arg; ++}; ++ ++static void call_do_test_empty(void *args) ++{ ++ struct do_test_empty_args *a = args; ++ *a->errp = do_test_empty(a->dentry, a->arg); ++} ++ ++static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg) ++{ ++ int err; ++ struct dentry *hidden_dentry; ++ struct inode *hidden_inode; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ hidden_dentry = au_h_dptr_i(dentry, arg->bindex); ++ DEBUG_ON(!hidden_dentry); ++ hidden_inode = hidden_dentry->d_inode; ++ DEBUG_ON(!hidden_inode || !S_ISDIR(hidden_inode->i_mode)); ++ ++ hi_lock_child(hidden_inode); ++ err = au_test_perm(hidden_inode, MAY_EXEC | MAY_READ, ++ need_dlgt(dentry->d_sb)); ++ i_unlock(hidden_inode); ++ if (!err) ++ err = do_test_empty(dentry, arg); ++ else { ++ struct do_test_empty_args args = { ++ .errp = &err, ++ .dentry = dentry, ++ .arg = arg ++ }; ++ au_wkq_wait(call_do_test_empty, &args, /*dlgt*/0); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++int au_test_empty_lower(struct dentry *dentry) ++{ ++ int err; ++ struct inode *inode; ++ struct test_empty_arg arg; ++ struct aufs_nhash *whlist; ++ aufs_bindex_t bindex, bstart, btail; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ inode = dentry->d_inode; ++ DEBUG_ON(!inode || !S_ISDIR(inode->i_mode)); ++ ++ whlist = nhash_new(GFP_KERNEL); ++ err = PTR_ERR(whlist); ++ if (IS_ERR(whlist)) ++ goto out; ++ ++ bstart = dbstart(dentry); ++ arg.whlist = whlist; ++ arg.whonly = 0; ++ arg.bindex = bstart; ++ err = do_test_empty(dentry, &arg); ++ if (unlikely(err)) ++ goto out_whlist; ++ ++ arg.whonly = 1; ++ btail = dbtaildir(dentry); ++ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) { ++ struct dentry *hidden_dentry; ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (hidden_dentry && hidden_dentry->d_inode) { ++ DEBUG_ON(!S_ISDIR(hidden_dentry->d_inode->i_mode)); ++ arg.bindex = bindex; ++ err = do_test_empty(dentry, &arg); ++ } ++ } ++ ++ out_whlist: ++ nhash_del(whlist); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++int test_empty(struct dentry *dentry, struct aufs_nhash *whlist) ++{ ++ int err; ++ struct inode *inode; ++ struct test_empty_arg arg; ++ aufs_bindex_t bindex, btail; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ inode = dentry->d_inode; ++ DEBUG_ON(!inode || !S_ISDIR(inode->i_mode)); ++ ++ err = 0; ++ arg.whlist = whlist; ++ arg.whonly = 1; ++ btail = dbtaildir(dentry); ++ for (bindex = dbstart(dentry); !err && bindex <= btail; bindex++) { ++ struct dentry *hidden_dentry; ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (hidden_dentry && hidden_dentry->d_inode) { ++ DEBUG_ON(!S_ISDIR(hidden_dentry->d_inode->i_mode)); ++ arg.bindex = bindex; ++ err = sio_test_empty(dentry, &arg); ++ } ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void au_add_nlink(struct inode *dir, struct inode *h_dir) ++{ ++ DEBUG_ON(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); ++ dir->i_nlink += h_dir->i_nlink - 2; ++ if (unlikely(h_dir->i_nlink < 2)) ++ dir->i_nlink += 2; ++} ++ ++void au_sub_nlink(struct inode *dir, struct inode *h_dir) ++{ ++ DEBUG_ON(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode)); ++ dir->i_nlink -= h_dir->i_nlink - 2; ++ if (unlikely(h_dir->i_nlink < 2)) ++ dir->i_nlink -= 2; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#if 0 // comment ++struct file_operations { ++ struct module *owner; ++ loff_t (*llseek) (struct file *, loff_t, int); ++ ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ++ ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ++ ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ++ ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); ++ int (*readdir) (struct file *, void *, filldir_t); ++ unsigned int (*poll) (struct file *, struct poll_table_struct *); ++ int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); ++ long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); ++ long (*compat_ioctl) (struct file *, unsigned int, unsigned long); ++ int (*mmap) (struct file *, struct vm_area_struct *); ++ int (*open) (struct inode *, struct file *); ++ int (*flush) (struct file *); ++ int (*release) (struct inode *, struct file *); ++ int (*fsync) (struct file *, struct dentry *, int datasync); ++ int (*aio_fsync) (struct kiocb *, int datasync); ++ int (*fasync) (int, struct file *, int); ++ int (*lock) (struct file *, int, struct file_lock *); ++ ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); ++ ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); ++ ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); ++ ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); ++ unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); ++ int (*check_flags)(int); ++ int (*dir_notify)(struct file *file, unsigned long arg); ++ int (*flock) (struct file *, int, struct file_lock *); ++}; ++#endif ++ ++struct file_operations aufs_dir_fop = { ++ .read = generic_read_dir, ++ .readdir = aufs_readdir, ++ .open = aufs_open_dir, ++ .release = aufs_release_dir, ++ .flush = aufs_flush, ++ .fsync = aufs_fsync_dir, ++}; +diff --git a/fs/aufs/dir.h b/fs/aufs/dir.h +new file mode 100755 +index 0000000..3ddf309 +--- /dev/null ++++ b/fs/aufs/dir.h +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: dir.h,v 1.18 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_DIR_H__ ++#define __AUFS_DIR_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19) ++#define filldir_ino_t u64 ++#else ++#define filldir_ino_t ino_t ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* need to be faster and smaller */ ++ ++#define AUFS_DEBLK_SIZE 512 // todo: changable ++#define AUFS_NHASH_SIZE 32 // todo: changable ++#if AUFS_DEBLK_SIZE < NAME_MAX || PAGE_SIZE < AUFS_DEBLK_SIZE ++#error invalid size AUFS_DEBLK_SIZE ++#endif ++ ++typedef char aufs_deblk_t[AUFS_DEBLK_SIZE]; ++ ++struct aufs_nhash { ++ struct hlist_head heads[AUFS_NHASH_SIZE]; ++}; ++ ++struct aufs_destr { ++ unsigned char len; ++ char name[0]; ++} __attribute__ ((packed)); ++ ++struct aufs_dehstr { ++ struct hlist_node hash; ++ struct aufs_destr *str; ++}; ++ ++struct aufs_de { ++ ino_t de_ino; ++ unsigned char de_type; ++ //caution: packed ++ struct aufs_destr de_str; ++} __attribute__ ((packed)); ++ ++struct aufs_wh { ++ struct hlist_node wh_hash; ++ aufs_bindex_t wh_bindex; ++ struct aufs_destr wh_str; ++} __attribute__ ((packed)); ++ ++union aufs_deblk_p { ++ unsigned char *p; ++ aufs_deblk_t *deblk; ++ struct aufs_de *de; ++}; ++ ++struct aufs_vdir { ++ aufs_deblk_t **vd_deblk; ++ int vd_nblk; ++ struct { ++ int i; ++ union aufs_deblk_p p; ++ } vd_last; ++ ++ unsigned long vd_version; ++ unsigned long vd_jiffy; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* dir.c */ ++extern struct file_operations aufs_dir_fop; ++int au_test_empty_lower(struct dentry *dentry); ++int test_empty(struct dentry *dentry, struct aufs_nhash *whlist); ++void au_add_nlink(struct inode *dir, struct inode *h_dir); ++void au_sub_nlink(struct inode *dir, struct inode *h_dir); ++ ++/* vdir.c */ ++struct aufs_nhash *nhash_new(gfp_t gfp); ++void nhash_del(struct aufs_nhash *nhash); ++void nhash_init(struct aufs_nhash *nhash); ++void nhash_move(struct aufs_nhash *dst, struct aufs_nhash *src); ++void nhash_fin(struct aufs_nhash *nhash); ++int is_longer_wh(struct aufs_nhash *whlist, aufs_bindex_t btgt, int limit); ++int test_known_wh(struct aufs_nhash *whlist, char *name, int namelen); ++int append_wh(struct aufs_nhash *whlist, char *name, int namelen, ++ aufs_bindex_t bindex); ++void free_vdir(struct aufs_vdir *vdir); ++int au_init_vdir(struct file *file); ++int au_fill_de(struct file *file, void *dirent, filldir_t filldir); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline ++unsigned int au_name_hash(const unsigned char *name, unsigned int len) ++{ ++ return (full_name_hash(name, len) % AUFS_NHASH_SIZE); ++} ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_DIR_H__ */ +diff --git a/fs/aufs/export.c b/fs/aufs/export.c +new file mode 100755 +index 0000000..7b1c6ac +--- /dev/null ++++ b/fs/aufs/export.c +@@ -0,0 +1,585 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: export.c,v 1.7 2007/05/14 03:38:24 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++extern struct export_operations export_op_default; ++#define CALL(ops, func) (((ops)->func) ? ((ops)->func) : export_op_default.func) ++#define is_anon(d) ((d)->d_flags & DCACHE_DISCONNECTED) ++ ++union conv { ++#if BITS_PER_LONG == 32 ++ __u32 a[1]; ++#else ++ __u32 a[2]; ++#endif ++ ino_t ino; ++}; ++ ++static ino_t decode_ino(__u32 *a) ++{ ++ union conv u; ++ u.a[0] = a[0]; ++#if BITS_PER_LONG == 64 ++ u.a[1] = a[1]; ++#endif ++ return u.ino; ++} ++ ++static void encode_ino(__u32 *a, ino_t ino) ++{ ++ union conv u; ++ u.ino = ino; ++ a[0] = u.a[0]; ++#if BITS_PER_LONG == 64 ++ a[1] = u.a[1]; ++#endif ++} ++ ++static void decode_br_id_sigen(__u32 a, aufs_bindex_t *br_id, ++ aufs_bindex_t *sigen) ++{ ++ BUILD_BUG_ON((sizeof(*br_id) + sizeof(*sigen)) > sizeof(a)); ++ *br_id = a >> 16; ++ DEBUG_ON(*br_id < 0); ++ *sigen = a; ++ DEBUG_ON(*sigen < 0); ++} ++ ++static __u32 encode_br_id_sigen(aufs_bindex_t br_id, aufs_bindex_t sigen) ++{ ++ DEBUG_ON(br_id < 0 || sigen < 0); ++ return (br_id << 16) | sigen; ++} ++ ++/* NFS file handle */ ++enum { ++ /* support 64bit inode number */ ++ /* but untested */ ++ Fh_br_id_sigen, ++ Fh_ino1, ++#if BITS_PER_LONG == 64 ++ Fh_ino2, ++#endif ++ Fh_dir_ino1, ++#if BITS_PER_LONG == 64 ++ Fh_dir_ino2, ++#endif ++ Fh_h_ino1, ++#if BITS_PER_LONG == 64 ++ Fh_h_ino2, ++#endif ++ Fh_h_igen, ++ Fh_h_type, ++ Fh_tail, ++ ++ Fh_ino = Fh_ino1, ++ Fh_dir_ino = Fh_dir_ino1, ++ Fh_h_ino = Fh_h_ino1, ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino, ++ ino_t dir_ino) ++{ ++ struct dentry *dentry; ++ struct inode *inode; ++ ++ LKTRTrace("i%lu, diri%lu\n", ino, dir_ino); ++ ++ dentry = NULL; ++ inode = ilookup(sb, ino); ++ if (unlikely(!inode)) ++ goto out; ++ ++ dentry = ERR_PTR(-ESTALE); ++ if (unlikely(is_bad_inode(inode))) ++ goto out_iput; ++ ++ dentry = NULL; ++ if (!S_ISDIR(inode->i_mode)) { ++ struct dentry *d; ++ spin_lock(&dcache_lock); ++ list_for_each_entry(d, &inode->i_dentry, d_alias) ++ if (!is_anon(d) ++ && d->d_parent->d_inode->i_ino == dir_ino) { ++ dentry = dget_locked(d); ++ break; ++ } ++ spin_unlock(&dcache_lock); ++ } else { ++ dentry = d_find_alias(inode); ++ if (dentry ++ && !is_anon(dentry) ++ && dentry->d_parent->d_inode->i_ino == dir_ino) ++ goto out_iput; /* success */ ++ ++ dput(dentry); ++ dentry = NULL; ++ } ++ ++ out_iput: ++ iput(inode); ++ out: ++ TraceErrPtr(dentry); ++ return dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct find_name_by_ino { ++ int called, found; ++ ino_t ino; ++ char *name; ++ int namelen; ++}; ++ ++static int ++find_name_by_ino(void *arg, const char *name, int namelen, loff_t offset, ++ filldir_ino_t ino, unsigned int d_type) ++{ ++ struct find_name_by_ino *a = arg; ++ ++ a->called++; ++ if (a->ino != ino) ++ return 0; ++ ++ memcpy(a->name, name, namelen); ++ a->namelen = namelen; ++ a->found = 1; ++ return 1; ++} ++ ++static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino, ++ ino_t dir_ino) ++{ ++ struct dentry *dentry, *parent; ++ struct inode *dir; ++ struct find_name_by_ino arg; ++ struct file *file; ++ int err; ++ ++ LKTRTrace("i%lu, diri%lu\n", ino, dir_ino); ++ ++ dentry = NULL; ++ dir = ilookup(sb, dir_ino); ++ if (unlikely(!dir)) ++ goto out; ++ ++ dentry = ERR_PTR(-ESTALE); ++ if (unlikely(is_bad_inode(dir))) ++ goto out_iput; ++ ++ dentry = NULL; ++ parent = d_find_alias(dir); ++ if (parent) { ++ if (unlikely(is_anon(parent))) { ++ dput(parent); ++ goto out_iput; ++ } ++ } else ++ goto out_iput; ++ ++ file = dentry_open(parent, NULL, au_dir_roflags); ++ dentry = (void*)file; ++ if (IS_ERR(file)) ++ goto out_iput; ++ ++ dentry = ERR_PTR(-ENOMEM); ++ arg.name = __getname(); ++ if (unlikely(!arg.name)) ++ goto out_fput; ++ arg.ino = ino; ++ arg.found = 0; ++ ++ do { ++ arg.called = 0; ++ //smp_mb(); ++ err = vfsub_readdir(file, find_name_by_ino, &arg, /*dlgt*/0); ++ } while (!err && !arg.found && arg.called); ++ dentry = ERR_PTR(err); ++ if (arg.found) { ++ /* do not call lkup_one(), nor dlgt */ ++ i_lock(dir); ++ dentry = lookup_one_len(arg.name, parent, arg.namelen); ++ i_unlock(dir); ++ TraceErrPtr(dentry); ++ } ++ ++ //out_putname: ++ __putname(arg.name); ++ out_fput: ++ fput(file); ++ out_iput: ++ iput(dir); ++ out: ++ TraceErrPtr(dentry); ++ return dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct append_name { ++ int found, called, len; ++ char *h_path; ++ ino_t h_ino; ++}; ++ ++static int append_name(void *arg, const char *name, int len, loff_t pos, ++ filldir_ino_t ino, unsigned int d_type) ++{ ++ struct append_name *a = arg; ++ char *p; ++ ++ a->called++; ++ if (ino != a->h_ino) ++ return 0; ++ ++ DEBUG_ON(len == 1 && *name == '.'); ++ DEBUG_ON(len == 2 && name[0] == '.' && name[1] == '.'); ++ a->len = strlen(a->h_path); ++ memmove(a->h_path - a->len - 1, a->h_path, a->len); ++ a->h_path -= a->len + 1; ++ p = a->h_path + a->len; ++ *p++ = '/'; ++ memcpy(p, name, a->len); ++ a->len += 1 + len; ++ a->found++; ++ return 1; ++} ++ ++static int h_acceptable(void *expv, struct dentry *dentry) ++{ ++ return 1; ++} ++ ++static struct dentry* ++decode_by_path(struct super_block *sb, aufs_bindex_t bindex, __u32 *fh, ++ int fh_len, void *context) ++{ ++ struct dentry *dentry, *h_parent, *root, *h_root; ++ struct super_block *h_sb; ++ char *path, *p; ++ struct vfsmount *h_mnt; ++ struct append_name arg; ++ int len, err; ++ struct file *h_file; ++ struct nameidata nd; ++ struct aufs_branch *br; ++ ++ LKTRTrace("b%d\n", bindex); ++ SiMustAnyLock(sb); ++ ++ br = stobr(sb, bindex); ++ //br_get(br); ++ h_mnt = br->br_mnt; ++ h_sb = h_mnt->mnt_sb; ++ LKTRTrace("%s, h_decode_fh\n", au_sbtype(h_sb)); ++ h_parent = CALL(h_sb->s_export_op, decode_fh) ++ (h_sb, fh + Fh_tail, fh_len - Fh_tail, fh[Fh_h_type], ++ h_acceptable, /*context*/NULL); ++ dentry = h_parent; ++ if (unlikely(!h_parent || IS_ERR(h_parent))) { ++ Warn1("%s decode_fh failed\n", au_sbtype(h_sb)); ++ goto out; ++ } ++ dentry = NULL; ++ if (unlikely(is_anon(h_parent))) { ++ Warn1("%s decode_fh returned a disconnected dentry\n", ++ au_sbtype(h_sb)); ++ dput(h_parent); ++ goto out; ++ } ++ ++ dentry = ERR_PTR(-ENOMEM); ++ path = __getname(); ++ if (unlikely(!path)) { ++ dput(h_parent); ++ goto out; ++ } ++ ++ root = sb->s_root; ++ di_read_lock_parent(root, !AUFS_I_RLOCK); ++ h_root = au_h_dptr_i(root, bindex); ++ di_read_unlock(root, !AUFS_I_RLOCK); ++ arg.h_path = d_path(h_root, h_mnt, path, PATH_MAX); ++ dentry = (void*)arg.h_path; ++ if (unlikely(!arg.h_path || IS_ERR(arg.h_path))) ++ goto out_putname; ++ len = strlen(arg.h_path); ++ arg.h_path = d_path(h_parent, h_mnt, path, PATH_MAX); ++ dentry = (void*)arg.h_path; ++ if (unlikely(!arg.h_path || IS_ERR(arg.h_path))) ++ goto out_putname; ++ LKTRTrace("%s\n", arg.h_path); ++ if (len != 1) ++ arg.h_path += len; ++ LKTRTrace("%s\n", arg.h_path); ++ ++ /* cf. fs/exportfs/expfs.c */ ++ h_file = dentry_open(h_parent, NULL, au_dir_roflags); ++ dentry = (void*)h_file; ++ if (IS_ERR(h_file)) ++ goto out_putname; ++ ++ arg.found = 0; ++ arg.h_ino = decode_ino(fh + Fh_h_ino); ++ do { ++ arg.called = 0; ++ err = vfsub_readdir(h_file, append_name, &arg, /*dlgt*/0); ++ } while (!err && !arg.found && arg.called); ++ LKTRTrace("%s, %d\n", arg.h_path, arg.len); ++ ++ p = d_path(root, stosi(sb)->si_mnt, path, PATH_MAX - arg.len - 2); ++ dentry = (void*)p; ++ if (unlikely(!p || IS_ERR(p))) ++ goto out_fput; ++ p[strlen(p)] = '/'; ++ LKTRTrace("%s\n", p); ++ ++ err = path_lookup(p, LOOKUP_FOLLOW, &nd); ++ dentry = ERR_PTR(err); ++ if (!err) { ++ dentry = dget(nd.dentry); ++ if (unlikely(is_anon(dentry))) { ++ dput(dentry); ++ dentry = ERR_PTR(-ESTALE); ++ } ++ path_release(&nd); ++ } ++ ++ out_fput: ++ fput(h_file); ++ out_putname: ++ __putname(path); ++ out: ++ //br_put(br); ++ TraceErrPtr(dentry); ++ return dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static struct dentry* ++aufs_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, int fh_type, ++ int (*acceptable)(void *context, struct dentry *de), ++ void *context) ++{ ++ struct dentry *dentry; ++ ino_t ino, dir_ino; ++ aufs_bindex_t bindex, br_id, sigen_v; ++ struct inode *inode, *h_inode; ++ ++ //au_debug_on(); ++ LKTRTrace("%d, fh{i%u, br_id_sigen 0x%x, hi%u}\n", ++ fh_type, fh[Fh_ino], fh[Fh_br_id_sigen], fh[Fh_h_ino]); ++ DEBUG_ON(fh_len < Fh_tail); ++ ++ si_read_lock(sb); ++ lockdep_off(); ++ ++ /* branch id may be wrapped around */ ++ dentry = ERR_PTR(-ESTALE); ++ decode_br_id_sigen(fh[Fh_br_id_sigen], &br_id, &sigen_v); ++ bindex = find_brindex(sb, br_id); ++ if (unlikely(bindex < 0 || au_sigen(sb) < sigen_v)) ++ goto out; ++ ++ /* is this inode still cached? */ ++ ino = decode_ino(fh + Fh_ino); ++ dir_ino = decode_ino(fh + Fh_dir_ino); ++ dentry = decode_by_ino(sb, ino, dir_ino); ++ if (IS_ERR(dentry)) ++ goto out; ++ if (dentry) ++ goto accept; ++ ++ /* is the parent dir cached? */ ++ dentry = decode_by_dir_ino(sb, ino, dir_ino); ++ if (IS_ERR(dentry)) ++ goto out; ++ if (dentry) ++ goto accept; ++ ++ /* lookup path */ ++ dentry = decode_by_path(sb, bindex, fh, fh_len, context); ++ if (IS_ERR(dentry)) ++ goto out; ++ if (unlikely(!dentry)) ++ goto out_stale; ++ if (unlikely(dentry->d_inode->i_ino != ino)) ++ goto out_dput; ++ ++ accept: ++ inode = dentry->d_inode; ++ h_inode = NULL; ++ ii_read_lock_child(inode); ++ if (ibstart(inode) <= bindex && bindex <= ibend(inode)) ++ h_inode = au_h_iptr_i(inode, bindex); ++ ii_read_unlock(inode); ++ if (h_inode ++ && h_inode->i_generation == fh[Fh_h_igen] ++ && acceptable(context, dentry)) ++ goto out; /* success */ ++ out_dput: ++ dput(dentry); ++ out_stale: ++ dentry = ERR_PTR(-ESTALE); ++ out: ++ lockdep_on(); ++ si_read_unlock(sb); ++ TraceErrPtr(dentry); ++ //au_debug_off(); ++ return dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int aufs_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, ++ int connectable) ++{ ++ int err; ++ struct super_block *sb, *h_sb; ++ struct inode *inode, *h_inode, *dir; ++ aufs_bindex_t bindex; ++ union conv u; ++ struct dentry *parent, *h_parent; ++ ++ //au_debug_on(); ++ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a)); ++ LKTRTrace("%.*s, max %d, conn %d\n", ++ DLNPair(dentry), *max_len, connectable); ++ DEBUG_ON(is_anon(dentry)); ++ inode = dentry->d_inode; ++ DEBUG_ON(!inode); ++ parent = dentry->d_parent; ++ DEBUG_ON(is_anon(parent)); ++ ++ err = -ENOSPC; ++ if (unlikely(*max_len <= Fh_tail)) { ++ Warn1("NFSv2 client (max_len %d)?\n", *max_len); ++ goto out; ++ } ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ di_read_lock_child(dentry, AUFS_I_RLOCK); ++ di_read_lock_parent(parent, AUFS_I_RLOCK); ++#ifdef CONFIG_AUFS_DEBUG ++ if (unlikely(!au_flag_test(sb, AuFlag_XINO))) ++ Warn1("NFS-exporting requires xino\n"); ++#if 0 ++ if (unlikely(au_flag_test(sb, AuFlag_UDBA_INOTIFY))) ++ Warn1("udba=inotify is not recommended when exporting\n"); ++#endif ++#endif ++ ++ err = -EPERM; ++ bindex = ibstart(inode); ++ h_sb = sbr_sb(sb, bindex); ++ if (unlikely(!h_sb->s_export_op)) { ++ Err1("%s branch is not exportable\n", au_sbtype(h_sb)); ++ goto out_unlock; ++ } ++ ++#if 0 //def CONFIG_AUFS_ROBR ++ if (unlikely(SB_AUFS(h_sb))) { ++ Err1("aufs branch is not supported\n"); ++ goto out_unlock; ++ } ++#endif ++ ++ /* doesn't support pseudo-link */ ++ if (unlikely(bindex < dbstart(dentry) ++ || dbend(dentry) < bindex ++ || !au_h_dptr_i(dentry, bindex))) { ++ Err("%.*s/%.*s, b%d, pseudo-link?\n", ++ DLNPair(dentry->d_parent), DLNPair(dentry), bindex); ++ goto out_unlock; ++ } ++ ++ fh[Fh_br_id_sigen] = encode_br_id_sigen(sbr_id(sb, bindex), ++ au_sigen(sb)); ++ encode_ino(fh + Fh_ino, inode->i_ino); ++ dir = parent->d_inode; ++ encode_ino(fh + Fh_dir_ino, dir->i_ino); ++ h_inode = au_h_iptr(inode); ++ encode_ino(fh + Fh_h_ino, h_inode->i_ino); ++ fh[Fh_h_igen] = h_inode->i_generation; ++ ++ /* it should be set at exporting time */ ++ if (unlikely(!h_sb->s_export_op->find_exported_dentry)) { ++ Warn("set default find_exported_dentry for %s\n", ++ au_sbtype(h_sb)); ++ h_sb->s_export_op->find_exported_dentry = find_exported_dentry; ++ } ++ ++ *max_len -= Fh_tail; ++ //LKTRTrace("Fh_tail %d, max_len %d\n", Fh_tail, *max_len); ++ h_parent = au_h_dptr_i(parent, bindex); ++ DEBUG_ON(is_anon(h_parent)); ++ err = fh[Fh_h_type] = CALL(h_sb->s_export_op, encode_fh) ++ (h_parent, fh + Fh_tail, max_len, connectable); ++ *max_len += Fh_tail; ++ if (err != 255) ++ err = 2; //?? ++ else ++ Warn1("%s encode_fh failed\n", au_sbtype(h_sb)); ++ ++ out_unlock: ++ di_read_unlock(parent, AUFS_I_RLOCK); ++ aufs_read_unlock(dentry, AUFS_I_RLOCK); ++ out: ++ TraceErr(err); ++ //au_debug_off(); ++ if (unlikely(err < 0)) ++ err = 255; ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#if 0 ++struct export_operations { ++ struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, int fh_len, int fh_type, ++ int (*acceptable)(void *context, struct dentry *de), ++ void *context); ++ int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, ++ int connectable); ++ ++ /* the following are only called from the filesystem itself */ ++ int (*get_name)(struct dentry *parent, char *name, ++ struct dentry *child); ++ struct dentry * (*get_parent)(struct dentry *child); ++ struct dentry * (*get_dentry)(struct super_block *sb, void *inump); ++ ++ /* This is set by the exporting module to a standard helper */ ++ struct dentry * (*find_exported_dentry)( ++ struct super_block *sb, void *obj, void *parent, ++ int (*acceptable)(void *context, struct dentry *de), ++ void *context); ++}; ++#endif ++ ++struct export_operations aufs_export_op = { ++ .decode_fh = aufs_decode_fh, ++ .encode_fh = aufs_encode_fh ++}; +diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c +new file mode 100755 +index 0000000..3cd1081 +--- /dev/null ++++ b/fs/aufs/f_op.c +@@ -0,0 +1,684 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: f_op.c,v 1.27 2007/05/14 03:38:24 sfjro Exp $ */ ++ ++#include <linux/fsnotify.h> ++#include <linux/pagemap.h> ++#include <linux/poll.h> ++#include <linux/security.h> ++#include <linux/version.h> ++#include "aufs.h" ++ ++/* common function to regular file and dir */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++#define FlushArgs hidden_file, id ++int aufs_flush(struct file *file, fl_owner_t id) ++#else ++#define FlushArgs hidden_file ++int aufs_flush(struct file *file) ++#endif ++{ ++ int err; ++ struct dentry *dentry; ++ aufs_bindex_t bindex, bend; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ ++ // aufs_read_lock_file() ++ si_read_lock(dentry->d_sb); ++ fi_read_lock(file); ++ di_read_lock_child(dentry, !AUFS_I_RLOCK); ++ ++ err = 0; ++ bend = fbend(file); ++ for (bindex = fbstart(file); !err && bindex <= bend; bindex++) { ++ struct file *hidden_file; ++ hidden_file = au_h_fptr_i(file, bindex); ++ if (hidden_file && hidden_file->f_op ++ && hidden_file->f_op->flush) ++ err = hidden_file->f_op->flush(FlushArgs); ++ } ++ ++ di_read_unlock(dentry, !AUFS_I_RLOCK); ++ fi_read_unlock(file); ++ si_read_unlock(dentry->d_sb); ++ TraceErr(err); ++ return err; ++} ++#undef FlushArgs ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int do_open_nondir(struct file *file, int flags) ++{ ++ int err; ++ aufs_bindex_t bindex; ++ struct super_block *sb; ++ struct file *hidden_file; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct aufs_finfo *finfo; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, flags 0%o\n", DLNPair(dentry), flags); ++ FiMustWriteLock(file); ++ inode = dentry->d_inode; ++ DEBUG_ON(!inode || S_ISDIR(inode->i_mode)); ++ ++ err = 0; ++ finfo = ftofi(file); ++ finfo->fi_h_vm_ops = NULL; ++ sb = dentry->d_sb; ++ bindex = dbstart(dentry); ++ DEBUG_ON(!au_h_dptr(dentry)->d_inode); ++ /* O_TRUNC is processed already */ ++ BUG_ON(test_ro(sb, bindex, inode) && (flags & O_TRUNC)); ++ ++ hidden_file = hidden_open(dentry, bindex, flags); ++ //if (LktrCond) {fput(hidden_file); br_put(stobr(dentry->d_sb, bindex)); ++ //hidden_file = ERR_PTR(-1);} ++ if (!IS_ERR(hidden_file)) { ++ set_fbstart(file, bindex); ++ set_fbend(file, bindex); ++ set_h_fptr(file, bindex, hidden_file); ++ return 0; /* success */ ++ } ++ err = PTR_ERR(hidden_file); ++ TraceErr(err); ++ return err; ++} ++ ++static int aufs_open_nondir(struct inode *inode, struct file *file) ++{ ++ return au_do_open(inode, file, do_open_nondir); ++} ++ ++static int aufs_release_nondir(struct inode *inode, struct file *file) ++{ ++ struct super_block *sb = file->f_dentry->d_sb; ++ ++ LKTRTrace("i%lu, %.*s\n", inode->i_ino, DLNPair(file->f_dentry)); ++ ++ si_read_lock(sb); ++ au_fin_finfo(file); ++ si_read_unlock(sb); ++ return 0; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static ssize_t aufs_read(struct file *file, char __user *buf, size_t count, ++ loff_t *ppos) ++{ ++ ssize_t err; ++ struct dentry *dentry; ++ struct file *hidden_file; ++ struct super_block *sb; ++ struct inode *h_inode; ++ int dlgt; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, cnt %lu, pos %Ld\n", ++ DLNPair(dentry), (unsigned long)count, *ppos); ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = au_reval_and_lock_finfo(file, au_reopen_nondir, /*wlock*/0, ++ /*locked*/0); ++ //if (LktrCond) {fi_read_unlock(file); err = -1;} ++ if (unlikely(err)) ++ goto out; ++ ++ /* support LSM and notify */ ++ dlgt = need_dlgt(sb); ++ hidden_file = au_h_fptr(file); ++ h_inode = hidden_file->f_dentry->d_inode; ++ if (!au_flag_test(sb, AuFlag_UDBA_INOTIFY)) ++ err = vfsub_read_u(hidden_file, buf, count, ppos, dlgt); ++ else { ++ struct inode *dir = dentry->d_parent->d_inode, ++ *h_dir = hidden_file->f_dentry->d_parent->d_inode; ++ aufs_bindex_t bstart = fbstart(file); ++ hdir_lock(h_dir, dir, bstart); ++ err = vfsub_read_u(hidden_file, buf, count, ppos, dlgt); ++ hdir_unlock(h_dir, dir, bstart); ++ } ++ memcpy(&file->f_ra, &hidden_file->f_ra, sizeof(file->f_ra)); //?? ++ dentry->d_inode->i_atime = hidden_file->f_dentry->d_inode->i_atime; ++ ++ fi_read_unlock(file); ++ out: ++ si_read_unlock(sb); ++ TraceErr(err); ++ return err; ++} ++ ++static ssize_t aufs_write(struct file *file, const char __user *__buf, ++ size_t count, loff_t *ppos) ++{ ++ ssize_t err; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct super_block *sb; ++ struct file *hidden_file; ++ char __user *buf = (char __user*)__buf; ++ struct inode *h_inode; ++ int dlgt; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, cnt %lu, pos %Ld\n", ++ DLNPair(dentry), (unsigned long)count, *ppos); ++ ++ inode = dentry->d_inode; ++ i_lock(inode); ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = au_reval_and_lock_finfo(file, au_reopen_nondir, /*wlock*/1, ++ /*locked*/1); ++ //if (LktrCond) {fi_write_unlock(file); err = -1;} ++ if (unlikely(err)) ++ goto out; ++ err = au_ready_to_write(file, -1); ++ //if (LktrCond) err = -1; ++ if (unlikely(err)) ++ goto out_unlock; ++ ++ /* support LSM and notify */ ++ dlgt = need_dlgt(sb); ++ hidden_file = au_h_fptr(file); ++ h_inode = hidden_file->f_dentry->d_inode; ++ if (!au_flag_test(sb, AuFlag_UDBA_INOTIFY)) ++ err = vfsub_write_u(hidden_file, buf, count, ppos, dlgt); ++ else { ++ struct inode *dir = dentry->d_parent->d_inode, ++ *h_dir = hidden_file->f_dentry->d_parent->d_inode; ++ aufs_bindex_t bstart = fbstart(file); ++ hdir_lock(h_dir, dir, bstart); ++ err = vfsub_write_u(hidden_file, buf, count, ppos, dlgt); ++ hdir_unlock(h_dir, dir, bstart); ++ } ++ ii_write_lock_child(inode); ++ au_cpup_attr_timesizes(inode); ++ ii_write_unlock(inode); ++ ++ out_unlock: ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ i_unlock(inode); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#if 0 //def CONFIG_AUFS_ROBR ++struct lvma { ++ struct list_head list; ++ struct vm_area_struct *vma; ++}; ++ ++static struct file *safe_file(struct vm_area_struct *vma) ++{ ++ struct file *file = vma->vm_file; ++ struct super_block *sb = file->f_dentry->d_sb; ++ struct lvma *lvma, *entry; ++ struct aufs_sbinfo *sbinfo; ++ int found, warn; ++ ++ TraceEnter(); ++ DEBUG_ON(!SB_AUFS(sb)); ++ ++ warn = 0; ++ found = 0; ++ sbinfo = stosi(sb); ++ spin_lock(&sbinfo->si_lvma_lock); ++ list_for_each_entry(entry, &sbinfo->si_lvma, list) { ++ found = (entry->vma == vma); ++ if (unlikely(found)) ++ break; ++ } ++ if (!found) { ++ lvma = kmalloc(sizeof(*lvma), GFP_ATOMIC); ++ if (lvma) { ++ lvma->vma = vma; ++ list_add(&lvma->list, &sbinfo->si_lvma); ++ } else { ++ warn = 1; ++ file = NULL; ++ } ++ } else ++ file = NULL; ++ spin_unlock(&sbinfo->si_lvma_lock); ++ ++ if (unlikely(warn)) ++ Warn1("no memory for lvma\n"); ++ return file; ++} ++ ++static void reset_file(struct vm_area_struct *vma, struct file *file) ++{ ++ struct super_block *sb = file->f_dentry->d_sb; ++ struct lvma *entry, *found; ++ struct aufs_sbinfo *sbinfo; ++ ++ TraceEnter(); ++ DEBUG_ON(!SB_AUFS(sb)); ++ ++ vma->vm_file = file; ++ ++ found = NULL; ++ sbinfo = stosi(sb); ++ spin_lock(&sbinfo->si_lvma_lock); ++ list_for_each_entry(entry, &sbinfo->si_lvma, list) ++ if (entry->vma == vma){ ++ found = entry; ++ break; ++ } ++ DEBUG_ON(!found); ++ list_del(&found->list); ++ spin_unlock(&sbinfo->si_lvma_lock); ++ kfree(found); ++} ++ ++#else ++ ++static struct file *safe_file(struct vm_area_struct *vma) ++{ ++ struct file *file; ++ ++ file = vma->vm_file; ++ if (file->private_data && au_is_aufs(file->f_dentry->d_sb)) ++ return file; ++ return NULL; ++} ++ ++static void reset_file(struct vm_area_struct *vma, struct file *file) ++{ ++ vma->vm_file = file; ++ smp_mb(); ++} ++#endif /* CONFIG_AUFS_ROBR */ ++ ++static struct page *aufs_nopage(struct vm_area_struct *vma, unsigned long addr, ++ int *type) ++{ ++ struct page *page; ++ struct dentry *dentry; ++ struct file *file, *hidden_file; ++ struct inode *inode; ++ static DECLARE_WAIT_QUEUE_HEAD(wq); ++ struct aufs_finfo *finfo; ++ ++ TraceEnter(); ++ DEBUG_ON(!vma || !vma->vm_file); ++ wait_event(wq, (file = safe_file(vma))); ++ DEBUG_ON(!au_is_aufs(file->f_dentry->d_sb)); ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, addr %lx\n", DLNPair(dentry), addr); ++ inode = dentry->d_inode; ++ DEBUG_ON(!S_ISREG(inode->i_mode)); ++ ++ // do not revalidate, nor lock ++ finfo = ftofi(file); ++ hidden_file = finfo->fi_hfile[0 + finfo->fi_bstart].hf_file; ++ DEBUG_ON(!hidden_file || !au_is_mmapped(file)); ++ vma->vm_file = hidden_file; ++ //smp_mb(); ++ page = finfo->fi_h_vm_ops->nopage(vma, addr, type); ++ reset_file(vma, file); ++#if 0 //def CONFIG_SMP ++ //wake_up_nr(&wq, online_cpu - 1); ++ wake_up_all(&wq); ++#else ++ wake_up(&wq); ++#endif ++ if (!IS_ERR(page)) { ++ //page->mapping = file->f_mapping; ++ //get_page(page); ++ //file->f_mapping = hidden_file->f_mapping; ++ //touch_atime(NULL, dentry); ++ //inode->i_atime = hidden_file->f_dentry->d_inode->i_atime; ++ } ++ TraceErrPtr(page); ++ return page; ++} ++ ++static int aufs_populate(struct vm_area_struct *vma, unsigned long addr, ++ unsigned long len, pgprot_t prot, unsigned long pgoff, ++ int nonblock) ++{ ++ Err("please report me this application\n"); ++ BUG(); ++ return ftofi(vma->vm_file)->fi_h_vm_ops->populate ++ (vma, addr, len, prot, pgoff, nonblock); ++} ++ ++static struct vm_operations_struct aufs_vm_ops = { ++ //.open = aufs_vmaopen, ++ //.close = aufs_vmaclose, ++ .nopage = aufs_nopage, ++ .populate = aufs_populate, ++ //page_mkwrite(struct vm_area_struct *vma, struct page *page) ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int aufs_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ int err, wlock, mmapped; ++ struct dentry *dentry; ++ struct super_block *sb; ++ struct file *h_file; ++ struct vm_operations_struct *vm_ops; ++ unsigned long flags; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, %lx, len %lu\n", ++ DLNPair(dentry), vma->vm_start, vma->vm_end - vma->vm_start); ++ DEBUG_ON(!S_ISREG(dentry->d_inode->i_mode)); ++ DEBUG_ON(down_write_trylock(&vma->vm_mm->mmap_sem)); ++ ++ mmapped = au_is_mmapped(file); ++ wlock = 0; ++ if (file->f_mode & FMODE_WRITE) { ++ flags = VM_SHARED | VM_WRITE; ++ wlock = ((flags & vma->vm_flags) == flags); ++ } ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = au_reval_and_lock_finfo(file, au_reopen_nondir, ++ wlock | !mmapped, /*locked*/0); ++ //err = -1; ++ if (unlikely(err)) ++ goto out; ++ ++ if (wlock) { ++ err = au_ready_to_write(file, -1); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_unlock; ++ } ++ ++ h_file = au_h_fptr(file); ++ vm_ops = ftofi(file)->fi_h_vm_ops; ++ if (unlikely(!mmapped)) { ++ // nfs uses some locks ++ lockdep_off(); ++ err = h_file->f_op->mmap(h_file, vma); ++ lockdep_on(); ++ if (unlikely(err)) ++ goto out_unlock; ++ vm_ops = vma->vm_ops; ++ DEBUG_ON(!vm_ops); ++ err = do_munmap(current->mm, vma->vm_start, ++ vma->vm_end - vma->vm_start); ++ if (unlikely(err)) { ++ IOErr("failed internal unmapping %.*s, %d\n", ++ DLNPair(h_file->f_dentry), err); ++ err = -EIO; ++ goto out_unlock; ++ } ++ } ++ DEBUG_ON(!vm_ops); ++ ++ err = generic_file_mmap(file, vma); ++ if (!err) { ++ file_accessed(h_file); ++ dentry->d_inode->i_atime = h_file->f_dentry->d_inode->i_atime; ++ vma->vm_ops = &aufs_vm_ops; ++ if (unlikely(!mmapped)) ++ ftofi(file)->fi_h_vm_ops = vm_ops; ++ } ++ ++ out_unlock: ++ if (!wlock && mmapped) ++ fi_read_unlock(file); ++ else ++ fi_write_unlock(file); ++ out: ++ si_read_unlock(sb); ++ TraceErr(err); ++ return err; ++} ++ ++// todo: try do_sendfile() in fs/read_write.c ++static ssize_t aufs_sendfile(struct file *file, loff_t *ppos, ++ size_t count, read_actor_t actor, void *target) ++{ ++ ssize_t err; ++ struct file *h_file; ++ const char c = current->comm[4]; ++ /* true if a kernel thread named 'loop[0-9].*' accesses a file */ ++ const int loopback = (current->mm == NULL ++ && '0' <= c && c <= '9' ++ && strncmp(current->comm, "loop", 4) == 0); ++ struct dentry *dentry; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, pos %Ld, cnt %lu, loopback %d\n", ++ DLNPair(dentry), *ppos, (unsigned long)count, loopback); ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = au_reval_and_lock_finfo(file, au_reopen_nondir, /*wlock*/0, ++ /*locked*/0); ++ if (unlikely(err)) ++ goto out; ++ ++ err = -EINVAL; ++ h_file = au_h_fptr(file); ++ if (h_file->f_op && h_file->f_op->sendfile) { ++ if (/* unlikely */(loopback)) { ++ file->f_mapping = h_file->f_mapping; ++ smp_mb(); //?? ++ } ++ // nfs uses some locks ++ lockdep_off(); ++ err = h_file->f_op->sendfile ++ (h_file, ppos, count, actor, target); ++ lockdep_on(); ++ dentry->d_inode->i_atime = h_file->f_dentry->d_inode->i_atime; ++ } ++ fi_read_unlock(file); ++ ++ out: ++ si_read_unlock(sb); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* copied from linux/fs/select.h, must match */ ++#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) ++ ++static unsigned int aufs_poll(struct file *file, poll_table *wait) ++{ ++ unsigned int mask; ++ struct file *hidden_file; ++ int err; ++ struct dentry *dentry; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, wait %p\n", DLNPair(dentry), wait); ++ DEBUG_ON(S_ISDIR(dentry->d_inode->i_mode)); ++ ++ /* We should pretend an error happend. */ ++ mask = POLLERR /* | POLLIN | POLLOUT */; ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = au_reval_and_lock_finfo(file, au_reopen_nondir, /*wlock*/0, ++ /*locked*/0); ++ //err = -1; ++ if (unlikely(err)) ++ goto out; ++ ++ /* it is not an error of hidden_file has no operation */ ++ mask = DEFAULT_POLLMASK; ++ hidden_file = au_h_fptr(file); ++ if (hidden_file->f_op && hidden_file->f_op->poll) ++ mask = hidden_file->f_op->poll(hidden_file, wait); ++ fi_read_unlock(file); ++ ++ out: ++ si_read_unlock(sb); ++ TraceErr((int)mask); ++ return mask; ++} ++ ++static int aufs_fsync_nondir(struct file *file, struct dentry *dentry, ++ int datasync) ++{ ++ int err, my_lock; ++ struct inode *inode; ++ struct file *hidden_file; ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s, %d\n", DLNPair(dentry), datasync); ++ inode = dentry->d_inode; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) ++ IMustLock(inode); ++ my_lock = 0; ++#else ++ /* before 2.6.17, ++ * msync(2) calls me without locking i_sem/i_mutex, but fsync(2). ++ */ ++ my_lock = !i_trylock(inode); ++#endif ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = 0; //-EBADF; // posix? ++ if (unlikely(!(file->f_mode & FMODE_WRITE))) ++ goto out; ++ err = au_reval_and_lock_finfo(file, au_reopen_nondir, /*wlock*/1, ++ /*locked*/1); ++ //err = -1; ++ if (unlikely(err)) ++ goto out; ++ err = au_ready_to_write(file, -1); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_unlock; ++ ++ err = -EINVAL; ++ hidden_file = au_h_fptr(file); ++ if (hidden_file->f_op && hidden_file->f_op->fsync) { ++ // todo: apparmor thread? ++ //file->f_mapping->host->i_mutex ++ ii_write_lock_child(inode); ++ hi_lock_child(hidden_file->f_dentry->d_inode); ++ err = hidden_file->f_op->fsync ++ (hidden_file, hidden_file->f_dentry, datasync); ++ //err = -1; ++ au_cpup_attr_timesizes(inode); ++ i_unlock(hidden_file->f_dentry->d_inode); ++ ii_write_unlock(inode); ++ } ++ ++ out_unlock: ++ fi_write_unlock(file); ++ out: ++ if (unlikely(my_lock)) ++ i_unlock(inode); ++ si_read_unlock(sb); ++ TraceErr(err); ++ return err; ++} ++ ++static int aufs_fasync(int fd, struct file *file, int flag) ++{ ++ int err; ++ struct file *hidden_file; ++ struct dentry *dentry; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, %d\n", DLNPair(dentry), flag); ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ err = au_reval_and_lock_finfo(file, au_reopen_nondir, /*wlock*/0, ++ /*locked*/0); ++ //err = -1; ++ if (unlikely(err)) ++ goto out; ++ ++ hidden_file = au_h_fptr(file); ++ if (hidden_file->f_op && hidden_file->f_op->fasync) ++ err = hidden_file->f_op->fasync(fd, hidden_file, flag); ++ fi_read_unlock(file); ++ ++ out: ++ si_read_unlock(sb); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#if 0 // comment ++struct file_operations { ++ struct module *owner; ++ loff_t (*llseek) (struct file *, loff_t, int); ++ ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ++ ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ++ ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ++ ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); ++ int (*readdir) (struct file *, void *, filldir_t); ++ unsigned int (*poll) (struct file *, struct poll_table_struct *); ++ int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); ++ long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); ++ long (*compat_ioctl) (struct file *, unsigned int, unsigned long); ++ int (*mmap) (struct file *, struct vm_area_struct *); ++ int (*open) (struct inode *, struct file *); ++ int (*flush) (struct file *); ++ int (*release) (struct inode *, struct file *); ++ int (*fsync) (struct file *, struct dentry *, int datasync); ++ int (*aio_fsync) (struct kiocb *, int datasync); ++ int (*fasync) (int, struct file *, int); ++ int (*lock) (struct file *, int, struct file_lock *); ++ ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); ++ ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); ++ ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); ++ ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); ++ unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); ++ int (*check_flags)(int); ++ int (*dir_notify)(struct file *file, unsigned long arg); ++ int (*flock) (struct file *, int, struct file_lock *); ++}; ++#endif ++ ++struct file_operations aufs_file_fop = { ++ .read = aufs_read, ++ .write = aufs_write, ++ .poll = aufs_poll, ++ .mmap = aufs_mmap, ++ .open = aufs_open_nondir, ++ .flush = aufs_flush, ++ .release = aufs_release_nondir, ++ .fsync = aufs_fsync_nondir, ++ .fasync = aufs_fasync, ++ .sendfile = aufs_sendfile, ++}; +diff --git a/fs/aufs/file.c b/fs/aufs/file.c +new file mode 100755 +index 0000000..857a4e8 +--- /dev/null ++++ b/fs/aufs/file.c +@@ -0,0 +1,832 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: file.c,v 1.42 2007/05/14 03:39:09 sfjro Exp $ */ ++ ++//#include <linux/fsnotify.h> ++#include <linux/pagemap.h> ++//#include <linux/poll.h> ++//#include <linux/security.h> ++#include "aufs.h" ++ ++/* drop flags for writing */ ++unsigned int au_file_roflags(unsigned int flags) ++{ ++ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC); ++ flags |= O_RDONLY | O_NOATIME; ++ return flags; ++} ++ ++/* common functions to regular file and dir */ ++struct file *hidden_open(struct dentry *dentry, aufs_bindex_t bindex, int flags) ++{ ++ struct dentry *hidden_dentry; ++ struct inode *hidden_inode; ++ struct super_block *sb; ++ struct vfsmount *hidden_mnt; ++ struct file *hidden_file; ++ struct aufs_branch *br; ++ loff_t old_size; ++ int udba; ++ ++ LKTRTrace("%.*s, b%d, flags 0%o\n", DLNPair(dentry), bindex, flags); ++ DEBUG_ON(!dentry); ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ DEBUG_ON(!hidden_dentry); ++ hidden_inode = hidden_dentry->d_inode; ++ DEBUG_ON(!hidden_inode); ++ ++ sb = dentry->d_sb; ++ udba = au_flag_test(sb, AuFlag_UDBA_INOTIFY); ++ if (unlikely(udba)) { ++ // test here? ++ } ++ ++ br = stobr(sb, bindex); ++ br_get(br); ++ /* drop flags for writing */ ++ if (test_ro(sb, bindex, dentry->d_inode)) ++ flags = au_file_roflags(flags); ++ flags &= ~O_CREAT; ++ spin_lock(&hidden_inode->i_lock); ++ old_size = i_size_read(hidden_inode); ++ spin_unlock(&hidden_inode->i_lock); ++ ++ //DbgSleep(3); ++ ++ dget(hidden_dentry); ++ hidden_mnt = mntget(br->br_mnt); ++ hidden_file = dentry_open(hidden_dentry, hidden_mnt, flags); ++ //if (LktrCond) {fput(hidden_file); hidden_file = ERR_PTR(-1);} ++ ++ if (!IS_ERR(hidden_file)) { ++#if 0 // remove this ++ if (/* old_size && */ (flags & O_TRUNC)) { ++ au_direval_dec(dentry); ++ if (!IS_ROOT(dentry)) ++ au_direval_dec(dentry->d_parent); ++ } ++#endif ++ return hidden_file; ++ } ++ ++ br_put(br); ++ TraceErrPtr(hidden_file); ++ return hidden_file; ++} ++ ++static int do_coo(struct dentry *dentry, aufs_bindex_t bstart) ++{ ++ int err; ++ struct dentry *parent, *h_parent, *h_dentry; ++ aufs_bindex_t bcpup; ++ struct inode *h_dir, *h_inode, *dir; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ DEBUG_ON(IS_ROOT(dentry)); ++ DiMustWriteLock(dentry); ++ ++ parent = dentry->d_parent; // dget_parent() ++ di_write_lock_parent(parent); ++ bcpup = err = find_rw_parent_br(dentry, bstart); ++ //bcpup = err = find_rw_br(sb, bstart); ++ if (unlikely(err < 0)) { ++ err = 0; // stop copyup, it is not an error ++ goto out; ++ } ++ err = 0; ++ ++ h_parent = au_h_dptr_i(parent, bcpup); ++ if (!h_parent) { ++ err = cpup_dirs(dentry, bcpup, NULL); ++ if (unlikely(err)) ++ goto out; ++ h_parent = au_h_dptr_i(parent, bcpup); ++ } ++ ++ h_dir = h_parent->d_inode; ++ h_dentry = au_h_dptr_i(dentry, bstart); ++ h_inode = h_dentry->d_inode; ++ dir = parent->d_inode; ++ hdir_lock(h_dir, dir, bcpup); ++ hi_lock_child(h_inode); ++ DEBUG_ON(au_h_dptr_i(dentry, bcpup)); ++ err = sio_cpup_simple(dentry, bcpup, -1, ++ au_flags_cpup(CPUP_DTIME, parent)); ++ TraceErr(err); ++ i_unlock(h_inode); ++ hdir_unlock(h_dir, dir, bcpup); ++ ++ out: ++ di_write_unlock(parent); ++ TraceErr(err); ++ return err; ++} ++ ++int au_do_open(struct inode *inode, struct file *file, ++ int (*open)(struct file *file, int flags)) ++{ ++ int err, coo; ++ struct dentry *dentry; ++ struct super_block *sb; ++ aufs_bindex_t bstart; ++ struct inode *h_dir, *dir; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("i%lu, %.*s\n", inode->i_ino, DLNPair(dentry)); ++ ++ sb = dentry->d_sb; ++ si_read_lock(sb); ++ coo = 0; ++#if 0 ++ switch (au_flag_test_coo(sb)) { ++ case AuFlag_COO_LEAF: ++ coo = !S_ISDIR(inode->i_mode); ++ break; ++ case AuFlag_COO_ALL: ++ coo = 1; ++ break; ++ } ++#endif ++ err = au_init_finfo(file); ++ //if (LktrCond) {fi_write_unlock(file); fin_finfo(file); err = -1;} ++ if (unlikely(err)) ++ goto out; ++ ++ if (!coo) { ++ di_read_lock_child(dentry, AUFS_I_RLOCK); ++ bstart = dbstart(dentry); ++ } else { ++ di_write_lock_child(dentry); ++ bstart = dbstart(dentry); ++ if (test_ro(sb, bstart, dentry->d_inode)) { ++ err = do_coo(dentry, bstart); ++ if (err) { ++ di_write_unlock(dentry); ++ goto out_finfo; ++ } ++ bstart = dbstart(dentry); ++ } ++ di_downgrade_lock(dentry, AUFS_I_RLOCK); ++ } ++ ++ // todo: remove this extra locks ++ dir = dentry->d_parent->d_inode; ++ if (!IS_ROOT(dentry)) ++ ii_read_lock_parent(dir); ++ h_dir = au_h_iptr_i(dir, bstart); ++ hdir_lock(h_dir, dir, bstart); ++ err = open(file, file->f_flags); ++ //if (LktrCond) err = -1; ++ hdir_unlock(h_dir, dir, bstart); ++ if (!IS_ROOT(dentry)) ++ ii_read_unlock(dir); ++ di_read_unlock(dentry, AUFS_I_RLOCK); ++ ++ out_finfo: ++ fi_write_unlock(file); ++ if (unlikely(err)) ++ au_fin_finfo(file); ++ //DbgFile(file); ++ out: ++ si_read_unlock(sb); ++ TraceErr(err); ++ return err; ++} ++ ++int au_reopen_nondir(struct file *file) ++{ ++ int err; ++ struct dentry *dentry; ++ aufs_bindex_t bstart, bindex, bend; ++ struct file *hidden_file, *h_file_tmp; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ DEBUG_ON(S_ISDIR(dentry->d_inode->i_mode) ++ || !au_h_dptr(dentry)->d_inode); ++ bstart = dbstart(dentry); ++ ++ h_file_tmp = NULL; ++ if (fbstart(file) == bstart) { ++ hidden_file = au_h_fptr(file); ++ if (file->f_mode == hidden_file->f_mode) ++ return 0; /* success */ ++ h_file_tmp = hidden_file; ++ get_file(h_file_tmp); ++ set_h_fptr(file, bstart, NULL); ++ } ++ DEBUG_ON(fbstart(file) < bstart ++ || ftofi(file)->fi_hfile[0 + bstart].hf_file); ++ ++ hidden_file = hidden_open(dentry, bstart, file->f_flags & ~O_TRUNC); ++ //if (LktrCond) {fput(hidden_file); br_put(stobr(dentry->d_sb, bstart)); ++ //hidden_file = ERR_PTR(-1);} ++ err = PTR_ERR(hidden_file); ++ if (IS_ERR(hidden_file)) ++ goto out; // close all? ++ err = 0; ++ //cpup_file_flags(hidden_file, file); ++ set_fbstart(file, bstart); ++ set_h_fptr(file, bstart, hidden_file); ++ memcpy(&hidden_file->f_ra, &file->f_ra, sizeof(file->f_ra)); //?? ++ ++ /* close lower files */ ++ bend = fbend(file); ++ for (bindex = bstart + 1; bindex <= bend; bindex++) ++ set_h_fptr(file, bindex, NULL); ++ set_fbend(file, bstart); ++ ++ out: ++ if (h_file_tmp) ++ fput(h_file_tmp); ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * copyup the deleted file for writing. ++ */ ++static int cpup_wh_file(struct file *file, aufs_bindex_t bdst, loff_t len) ++{ ++ int err; ++ struct dentry *dentry, *parent, *hidden_parent, *tmp_dentry; ++ struct dentry *hidden_dentry_bstart, *hidden_dentry_bdst; ++ struct inode *hidden_dir; ++ aufs_bindex_t bstart; ++ struct aufs_dinfo *dinfo; ++ struct dtime dt; ++ struct lkup_args lkup; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, bdst %d, len %Lu\n", DLNPair(dentry), bdst, len); ++ DEBUG_ON(S_ISDIR(dentry->d_inode->i_mode) ++ || !(file->f_mode & FMODE_WRITE)); ++ DiMustWriteLock(dentry); ++ parent = dentry->d_parent; ++ IiMustAnyLock(parent->d_inode); ++ hidden_parent = au_h_dptr_i(parent, bdst); ++ DEBUG_ON(!hidden_parent); ++ hidden_dir = hidden_parent->d_inode; ++ DEBUG_ON(!hidden_dir); ++ IMustLock(hidden_dir); ++ ++ sb = parent->d_sb; ++ lkup.nfsmnt = au_nfsmnt(sb, bdst); ++ lkup.dlgt = need_dlgt(sb); ++ tmp_dentry = lkup_whtmp(hidden_parent, &dentry->d_name, &lkup); ++ //if (LktrCond) {dput(tmp_dentry); tmp_dentry = ERR_PTR(-1);} ++ err = PTR_ERR(tmp_dentry); ++ if (IS_ERR(tmp_dentry)) ++ goto out; ++ ++ dtime_store(&dt, parent, hidden_parent); ++ dinfo = dtodi(dentry); ++ bstart = dinfo->di_bstart; ++ hidden_dentry_bdst = dinfo->di_hdentry[0 + bdst].hd_dentry; ++ hidden_dentry_bstart = dinfo->di_hdentry[0 + bstart].hd_dentry; ++ dinfo->di_bstart = bdst; ++ dinfo->di_hdentry[0 + bdst].hd_dentry = tmp_dentry; ++ dinfo->di_hdentry[0 + bstart].hd_dentry = au_h_fptr(file)->f_dentry; ++ err = cpup_single(dentry, bdst, bstart, len, ++ au_flags_cpup(!CPUP_DTIME, parent)); ++ //if (LktrCond) err = -1; ++ if (!err) ++ err = au_reopen_nondir(file); ++ //err = -1; ++ dinfo->di_hdentry[0 + bstart].hd_dentry = hidden_dentry_bstart; ++ dinfo->di_hdentry[0 + bdst].hd_dentry = hidden_dentry_bdst; ++ dinfo->di_bstart = bstart; ++ if (unlikely(err)) ++ goto out_tmp; ++ ++ DEBUG_ON(!d_unhashed(dentry)); ++ err = vfsub_unlink(hidden_dir, tmp_dentry, lkup.dlgt); ++ //if (LktrCond) err = -1; ++ if (unlikely(err)) { ++ IOErr("failed remove copied-up tmp file %.*s(%d)\n", ++ DLNPair(tmp_dentry), err); ++ err = -EIO; ++ } ++ dtime_revert(&dt, !CPUP_LOCKED_GHDIR); ++ ++ out_tmp: ++ dput(tmp_dentry); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++struct cpup_wh_file_args { ++ int *errp; ++ struct file *file; ++ aufs_bindex_t bdst; ++ loff_t len; ++}; ++ ++static void call_cpup_wh_file(void *args) ++{ ++ struct cpup_wh_file_args *a = args; ++ *a->errp = cpup_wh_file(a->file, a->bdst, a->len); ++} ++ ++/* ++ * prepare the @file for writing. ++ */ ++int au_ready_to_write(struct file *file, loff_t len) ++{ ++ int err; ++ struct dentry *dentry, *parent, *hidden_dentry, *hidden_parent; ++ struct inode *hidden_inode, *hidden_dir, *inode, *dir; ++ struct super_block *sb; ++ aufs_bindex_t bstart, bcpup; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, len %Ld\n", DLNPair(dentry), len); ++ FiMustWriteLock(file); ++ ++ sb = dentry->d_sb; ++ bstart = fbstart(file); ++ DEBUG_ON(ftobr(file, bstart) != stobr(sb, bstart)); ++ ++ inode = dentry->d_inode; ++ ii_read_lock_child(inode); ++ LKTRTrace("rdonly %d, bstart %d\n", test_ro(sb, bstart, inode), bstart); ++ err = test_ro(sb, bstart, inode); ++ ii_read_unlock(inode); ++ if (!err && (au_h_fptr(file)->f_mode & FMODE_WRITE)) ++ return 0; ++ ++ /* need to cpup */ ++ parent = dentry->d_parent; // dget_parent() ++ di_write_lock_child(dentry); ++ di_write_lock_parent(parent); ++ bcpup = err = find_rw_parent_br(dentry, bstart); ++ //bcpup = err = find_rw_br(sb, bstart); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ err = 0; ++ ++ hidden_parent = au_h_dptr_i(parent, bcpup); ++ if (!hidden_parent) { ++ err = cpup_dirs(dentry, bcpup, NULL); ++ //if (LktrCond) err = -1; ++ if (unlikely(err)) ++ goto out_unlock; ++ hidden_parent = au_h_dptr_i(parent, bcpup); ++ } ++ ++ hidden_dir = hidden_parent->d_inode; ++ hidden_dentry = au_h_fptr(file)->f_dentry; ++ hidden_inode = hidden_dentry->d_inode; ++ dir = parent->d_inode; ++ hdir_lock(hidden_dir, dir, bcpup); ++ hi_lock_child(hidden_inode); ++ if (d_unhashed(dentry) || d_unhashed(hidden_dentry) ++ /* || !hidden_inode->i_nlink */) { ++ if (!au_test_perm(hidden_dir, MAY_EXEC | MAY_WRITE, ++ need_dlgt(sb))) ++ err = cpup_wh_file(file, bcpup, len); ++ else { ++ struct cpup_wh_file_args args = { ++ .errp = &err, ++ .file = file, ++ .bdst = bcpup, ++ .len = len ++ }; ++ au_wkq_wait(call_cpup_wh_file, &args, /*dlgt*/0); ++ } ++ //if (LktrCond) err = -1; ++ TraceErr(err); ++ } else { ++ if (!au_h_dptr_i(dentry, bcpup)) ++ err = sio_cpup_simple(dentry, bcpup, len, ++ au_flags_cpup(CPUP_DTIME, ++ parent)); ++ //if (LktrCond) err = -1; ++ TraceErr(err); ++ if (!err) ++ err = au_reopen_nondir(file); ++ //if (LktrCond) err = -1; ++ TraceErr(err); ++ } ++ i_unlock(hidden_inode); ++ hdir_unlock(hidden_dir, dir, bcpup); ++ ++ out_unlock: ++ di_write_unlock(parent); ++ di_write_unlock(dentry); ++// out: ++ TraceErr(err); ++ return err; ++ ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * after branch manipulating, refresh the file. ++ */ ++static int refresh_file(struct file *file, int (*reopen)(struct file *file)) ++{ ++ int err, new_sz; ++ struct dentry *dentry; ++ aufs_bindex_t bend, bindex, bstart, brid; ++ struct aufs_hfile *p; ++ struct aufs_finfo *finfo; ++ struct super_block *sb; ++ struct inode *inode; ++ struct file *hidden_file; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ FiMustWriteLock(file); ++ DiMustReadLock(dentry); ++ inode = dentry->d_inode; ++ IiMustReadLock(inode); ++ //au_debug_on(); ++ //DbgDentry(dentry); ++ //DbgFile(file); ++ //au_debug_off(); ++ ++ err = -ENOMEM; ++ sb = dentry->d_sb; ++ finfo = ftofi(file); ++ bstart = finfo->fi_bstart; ++ bend = finfo->fi_bstart; ++ new_sz = sizeof(*finfo->fi_hfile) * (sbend(sb) + 1); ++ p = au_kzrealloc(finfo->fi_hfile, sizeof(*p) * (finfo->fi_bend + 1), ++ new_sz, GFP_KERNEL); ++ //p = NULL; ++ if (unlikely(!p)) ++ goto out; ++ finfo->fi_hfile = p; ++ hidden_file = p[0 + bstart].hf_file; ++ ++ p = finfo->fi_hfile + finfo->fi_bstart; ++ brid = p->hf_br->br_id; ++ bend = finfo->fi_bend; ++ for (bindex = finfo->fi_bstart; bindex <= bend; bindex++, p++) { ++ struct aufs_hfile tmp, *q; ++ aufs_bindex_t new_bindex; ++ ++ if (!p->hf_file) ++ continue; ++ new_bindex = find_bindex(sb, p->hf_br); ++ if (new_bindex == bindex) ++ continue; ++ if (new_bindex < 0) { // test here ++ set_h_fptr(file, bindex, NULL); ++ continue; ++ } ++ ++ /* swap two hidden inode, and loop again */ ++ q = finfo->fi_hfile + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hf_file) { ++ bindex--; ++ p--; ++ } ++ } ++ { ++ aufs_bindex_t s = finfo->fi_bstart, e = finfo->fi_bend; ++ finfo->fi_bstart = 0; ++ finfo->fi_bend = sbend(sb); ++ //au_debug_on(); ++ //DbgFile(file); ++ //au_debug_off(); ++ finfo->fi_bstart = s; ++ finfo->fi_bend = e; ++ } ++ ++ p = finfo->fi_hfile; ++ if (!au_is_mmapped(file) && !d_unhashed(dentry)) { ++ bend = sbend(sb); ++ for (finfo->fi_bstart = 0; finfo->fi_bstart <= bend; ++ finfo->fi_bstart++, p++) ++ if (p->hf_file) { ++ if (p->hf_file->f_dentry ++ && p->hf_file->f_dentry->d_inode) ++ break; ++ else ++ au_hfput(p); ++ } ++ } else { ++ bend = find_brindex(sb, brid); ++ //LKTRTrace("%d\n", bend); ++ for (finfo->fi_bstart = 0; finfo->fi_bstart < bend; ++ finfo->fi_bstart++, p++) ++ if (p->hf_file) ++ au_hfput(p); ++ //LKTRTrace("%d\n", finfo->fi_bstart); ++ bend = sbend(sb); ++ } ++ ++ p = finfo->fi_hfile + bend; ++ for (finfo->fi_bend = bend; finfo->fi_bend >= finfo->fi_bstart; ++ finfo->fi_bend--, p--) ++ if (p->hf_file) { ++ if (p->hf_file->f_dentry ++ && p->hf_file->f_dentry->d_inode) ++ break; ++ else ++ au_hfput(p); ++ } ++ //Dbg("%d, %d\n", finfo->fi_bstart, finfo->fi_bend); ++ DEBUG_ON(finfo->fi_bend < finfo->fi_bstart); ++ //DbgFile(file); ++ //DbgDentry(file->f_dentry); ++ ++ err = 0; ++#if 0 // todo: ++ if (!au_h_dptr(dentry)->d_inode) { ++ au_update_figen(file); ++ goto out; /* success */ ++ } ++#endif ++ ++ if (unlikely(au_is_mmapped(file) || d_unhashed(dentry))) ++ goto out_update; /* success */ ++ ++ again: ++ bstart = ibstart(inode); ++ if (bstart < finfo->fi_bstart ++ && au_flag_test(sb, AuFlag_PLINK) ++ && au_is_plinked(sb, inode)) { ++ struct dentry *parent = dentry->d_parent; // dget_parent() ++ struct inode *dir = parent->d_inode, *h_dir; ++ ++ if (test_ro(sb, bstart, inode)) { ++ di_read_lock_parent(parent, !AUFS_I_RLOCK); ++ bstart = err = find_rw_parent_br(dentry, bstart); ++ //bstart = err = find_rw_br(sb, bstart); ++ di_read_unlock(parent, !AUFS_I_RLOCK); ++ //todo: err = -1; ++ if (unlikely(err < 0)) ++ goto out; ++ } ++ di_read_unlock(dentry, AUFS_I_RLOCK); ++ di_write_lock_child(dentry); ++ if (bstart != ibstart(inode)) { // todo ++ /* someone changed our inode while we were sleeping */ ++ di_downgrade_lock(dentry, AUFS_I_RLOCK); ++ goto again; ++ } ++ ++ di_read_lock_parent(parent, AUFS_I_RLOCK); ++ err = test_and_cpup_dirs(dentry, bstart, NULL); ++ ++ // always superio. ++#if 1 ++ h_dir = au_h_dptr_i(parent, bstart)->d_inode; ++ hdir_lock(h_dir, dir, bstart); ++ err = sio_cpup_simple(dentry, bstart, -1, ++ au_flags_cpup(CPUP_DTIME, parent)); ++ hdir_unlock(h_dir, dir, bstart); ++ di_read_unlock(parent, AUFS_I_RLOCK); ++#else ++ if (!is_au_wkq(current)) { ++ struct cpup_pseudo_link_args args = { ++ .errp = &err, ++ .dentry = dentry, ++ .bdst = bstart, ++ .do_lock = 1 ++ }; ++ au_wkq_wait(call_cpup_pseudo_link, &args); ++ } else ++ err = cpup_pseudo_link(dentry, bstart, /*do_lock*/1); ++#endif ++ di_downgrade_lock(dentry, AUFS_I_RLOCK); ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ err = reopen(file); ++ //err = -1; ++ out_update: ++ if (!err) { ++ au_update_figen(file); ++ //DbgFile(file); ++ return 0; /* success */ ++ } ++ ++ /* error, close all hidden files */ ++ bend = fbend(file); ++ for (bindex = fbstart(file); bindex <= bend; bindex++) ++ set_h_fptr(file, bindex, NULL); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* common function to regular file and dir */ ++int au_reval_and_lock_finfo(struct file *file, int (*reopen)(struct file *file), ++ int wlock, int locked) ++{ ++ int err, sgen, fgen, pseudo_link; ++ struct dentry *dentry; ++ struct super_block *sb; ++ aufs_bindex_t bstart; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, w %d, l %d\n", DLNPair(dentry), wlock, locked); ++ sb = dentry->d_sb; ++ SiMustAnyLock(sb); ++ ++ err = 0; ++ sgen = au_sigen(sb); ++ fi_write_lock(file); ++ fgen = au_figen(file); ++ di_read_lock_child(dentry, AUFS_I_RLOCK); ++ bstart = dbstart(dentry); ++ pseudo_link = (bstart != ibstart(dentry->d_inode)); ++ di_read_unlock(dentry, AUFS_I_RLOCK); ++ if (sgen == fgen && !pseudo_link && fbstart(file) == bstart) { ++ if (!wlock) ++ fi_downgrade_lock(file); ++ return 0; /* success */ ++ } ++ ++ LKTRTrace("sgen %d, fgen %d\n", sgen, fgen); ++ if (sgen != au_digen(dentry)) { ++ /* ++ * d_path() and path_lookup() is a simple and good approach ++ * to revalidate. but si_rwsem in DEBUG_RWSEM will cause a ++ * deadlock. removed the code. ++ */ ++ di_write_lock_child(dentry); ++ err = au_reval_dpath(dentry, sgen); ++ //if (LktrCond) err = -1; ++ di_write_unlock(dentry); ++ if (unlikely(err < 0)) ++ goto out; ++ DEBUG_ON(au_digen(dentry) != sgen); ++ } ++ ++ di_read_lock_child(dentry, AUFS_I_RLOCK); ++ err = refresh_file(file, reopen); ++ //if (LktrCond) err = -1; ++ di_read_unlock(dentry, AUFS_I_RLOCK); ++ if (!err) { ++ if (!wlock) ++ fi_downgrade_lock(file); ++ } else ++ fi_write_unlock(file); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++// cf. aufs_nopage() ++// for madvise(2) ++static int aufs_readpage(struct file *file, struct page *page) ++{ ++ TraceEnter(); ++ unlock_page(page); ++ return 0; ++} ++ ++// they will never be called. ++#ifdef CONFIG_AUFS_DEBUG ++static int aufs_prepare_write(struct file *file, struct page *page, ++ unsigned from, unsigned to) ++{BUG();return 0;} ++static int aufs_commit_write(struct file *file, struct page *page, ++ unsigned from, unsigned to) ++{BUG();return 0;} ++static int aufs_writepage(struct page *page, struct writeback_control *wbc) ++{BUG();return 0;} ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) ++static void aufs_sync_page(struct page *page) ++{BUG();} ++#else ++static int aufs_sync_page(struct page *page) ++{BUG(); return 0;} ++#endif ++ ++#if 0 // comment ++static int aufs_writepages(struct address_space *mapping, ++ struct writeback_control *wbc) ++{BUG();return 0;} ++static int aufs_readpages(struct file *filp, struct address_space *mapping, ++ struct list_head *pages, unsigned nr_pages) ++{BUG();return 0;} ++static sector_t aufs_bmap(struct address_space *mapping, sector_t block) ++{BUG();return 0;} ++#endif ++ ++static int aufs_set_page_dirty(struct page *page) ++{BUG();return 0;} ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) ++static void aufs_invalidatepage (struct page *page, unsigned long offset) ++{BUG();} ++#else ++static int aufs_invalidatepage (struct page *page, unsigned long offset) ++{BUG(); return 0;} ++#endif ++static int aufs_releasepage (struct page *page, gfp_t gfp) ++{BUG();return 0;} ++static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb, ++ const struct iovec *iov, loff_t offset, ++ unsigned long nr_segs) ++{BUG();return 0;} ++static struct page* aufs_get_xip_page(struct address_space *mapping, ++ sector_t offset, int create) ++{BUG();return NULL;} ++//static int aufs_migratepage (struct page *newpage, struct page *page) ++//{BUG();return 0;} ++#endif ++ ++#if 0 // comment ++struct address_space { ++ struct inode *host; /* owner: inode, block_device */ ++ struct radix_tree_root page_tree; /* radix tree of all pages */ ++ rwlock_t tree_lock; /* and rwlock protecting it */ ++ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ ++ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ ++ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ ++ spinlock_t i_mmap_lock; /* protect tree, count, list */ ++ unsigned int truncate_count; /* Cover race condition with truncate */ ++ unsigned long nrpages; /* number of total pages */ ++ pgoff_t writeback_index;/* writeback starts here */ ++ struct address_space_operations *a_ops; /* methods */ ++ unsigned long flags; /* error bits/gfp mask */ ++ struct backing_dev_info *backing_dev_info; /* device readahead, etc */ ++ spinlock_t private_lock; /* for use by the address_space */ ++ struct list_head private_list; /* ditto */ ++ struct address_space *assoc_mapping; /* ditto */ ++} __attribute__((aligned(sizeof(long)))); ++ ++struct address_space_operations { ++ int (*writepage)(struct page *page, struct writeback_control *wbc); ++ int (*readpage)(struct file *, struct page *); ++ void (*sync_page)(struct page *); ++ ++ /* Write back some dirty pages from this mapping. */ ++ int (*writepages)(struct address_space *, struct writeback_control *); ++ ++ /* Set a page dirty. Return true if this dirtied it */ ++ int (*set_page_dirty)(struct page *page); ++ ++ int (*readpages)(struct file *filp, struct address_space *mapping, ++ struct list_head *pages, unsigned nr_pages); ++ ++ /* ++ * ext3 requires that a successful prepare_write() call be followed ++ * by a commit_write() call - they must be balanced ++ */ ++ int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); ++ int (*commit_write)(struct file *, struct page *, unsigned, unsigned); ++ /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ ++ sector_t (*bmap)(struct address_space *, sector_t); ++ void (*invalidatepage) (struct page *, unsigned long); ++ int (*releasepage) (struct page *, gfp_t); ++ ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, ++ loff_t offset, unsigned long nr_segs); ++ struct page* (*get_xip_page)(struct address_space *, sector_t, ++ int); ++ /* migrate the contents of a page to the specified target */ ++ int (*migratepage) (struct page *, struct page *); ++}; ++#endif ++ ++struct address_space_operations aufs_aop = { ++ .readpage = aufs_readpage, ++#ifdef CONFIG_AUFS_DEBUG ++ .writepage = aufs_writepage, ++ .sync_page = aufs_sync_page, ++ //.writepages = aufs_writepages, ++ .set_page_dirty = aufs_set_page_dirty, ++ //.readpages = aufs_readpages, ++ .prepare_write = aufs_prepare_write, ++ .commit_write = aufs_commit_write, ++ //.bmap = aufs_bmap, ++ .invalidatepage = aufs_invalidatepage, ++ .releasepage = aufs_releasepage, ++ .direct_IO = aufs_direct_IO, ++ .get_xip_page = aufs_get_xip_page, ++ //.migratepage = aufs_migratepage ++#endif ++}; +diff --git a/fs/aufs/file.h b/fs/aufs/file.h +new file mode 100755 +index 0000000..f0fa448 +--- /dev/null ++++ b/fs/aufs/file.h +@@ -0,0 +1,140 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: file.h,v 1.25 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_FILE_H__ ++#define __AUFS_FILE_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/file.h> ++#include <linux/fs.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++#include "misc.h" ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++// SEEK_xxx are defined in linux/fs.h ++#else ++enum {SEEK_SET, SEEK_CUR, SEEK_END}; ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct aufs_branch; ++struct aufs_hfile { ++ struct file *hf_file; ++ struct aufs_branch *hf_br; ++}; ++ ++struct aufs_vdir; ++struct aufs_finfo { ++ atomic_t fi_generation; ++ ++ struct aufs_rwsem fi_rwsem; ++ struct aufs_hfile *fi_hfile; ++ aufs_bindex_t fi_bstart, fi_bend; ++ ++ union { ++ struct vm_operations_struct *fi_h_vm_ops; ++ struct aufs_vdir *fi_vdir_cache; ++ }; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* file.c */ ++extern struct address_space_operations aufs_aop; ++unsigned int au_file_roflags(unsigned int flags); ++struct file *hidden_open(struct dentry *dentry, aufs_bindex_t bindex, ++ int flags); ++int au_do_open(struct inode *inode, struct file *file, ++ int (*open)(struct file *file, int flags)); ++int au_reopen_nondir(struct file *file); ++int au_ready_to_write(struct file *file, loff_t len); ++int au_reval_and_lock_finfo(struct file *file, int (*reopen)(struct file *file), ++ int wlock, int locked); ++ ++/* f_op.c */ ++extern struct file_operations aufs_file_fop; ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++int aufs_flush(struct file *file, fl_owner_t id); ++#else ++int aufs_flush(struct file *file); ++#endif ++ ++/* finfo.c */ ++struct aufs_finfo *ftofi(struct file *file); ++aufs_bindex_t fbstart(struct file *file); ++aufs_bindex_t fbend(struct file *file); ++struct aufs_vdir *fvdir_cache(struct file *file); ++struct aufs_branch *ftobr(struct file *file, aufs_bindex_t bindex); ++struct file *au_h_fptr_i(struct file *file, aufs_bindex_t bindex); ++struct file *au_h_fptr(struct file *file); ++ ++void set_fbstart(struct file *file, aufs_bindex_t bindex); ++void set_fbend(struct file *file, aufs_bindex_t bindex); ++void set_fvdir_cache(struct file *file, struct aufs_vdir *vdir_cache); ++void au_hfput(struct aufs_hfile *hf); ++void set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *h_file); ++void au_update_figen(struct file *file); ++ ++void au_fin_finfo(struct file *file); ++int au_init_finfo(struct file *file); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline int au_figen(struct file *f) ++{ ++ return atomic_read(&ftofi(f)->fi_generation); ++} ++ ++static inline int au_is_mmapped(struct file *f) ++{ ++ return !!(ftofi(f)->fi_h_vm_ops); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * fi_read_lock, fi_write_lock, ++ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock ++ */ ++SimpleRwsemFuncs(fi, struct file *f, ftofi(f)->fi_rwsem); ++ ++/* to debug easier, do not make them inlined functions */ ++#define FiMustReadLock(f) do {\ ++ SiMustAnyLock((f)->f_dentry->d_sb); \ ++ RwMustReadLock(&ftofi(f)->fi_rwsem); \ ++} while (0) ++ ++#define FiMustWriteLock(f) do { \ ++ SiMustAnyLock((f)->f_dentry->d_sb); \ ++ RwMustWriteLock(&ftofi(f)->fi_rwsem); \ ++} while (0) ++ ++#define FiMustAnyLock(f) do { \ ++ SiMustAnyLock((f)->f_dentry->d_sb); \ ++ RwMustAnyLock(&ftofi(f)->fi_rwsem); \ ++} while (0) ++ ++#define FiMustNoWaiters(f) RwMustNoWaiters(&ftofi(f)->fi_rwsem) ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_FILE_H__ */ +diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c +new file mode 100755 +index 0000000..1e09da8 +--- /dev/null ++++ b/fs/aufs/finfo.c +@@ -0,0 +1,211 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: finfo.c,v 1.23 2007/04/30 05:45:21 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++struct aufs_finfo *ftofi(struct file *file) ++{ ++ struct aufs_finfo *finfo = file->private_data; ++ DEBUG_ON(!finfo ++ || !finfo->fi_hfile ++ || (0 < finfo->fi_bend ++ && (/* stosi(file->f_dentry->d_sb)->si_bend ++ < finfo->fi_bend ++ || */ finfo->fi_bend < finfo->fi_bstart))); ++ return finfo; ++} ++ ++// hard/soft set ++aufs_bindex_t fbstart(struct file *file) ++{ ++ FiMustAnyLock(file); ++ return ftofi(file)->fi_bstart; ++} ++ ++aufs_bindex_t fbend(struct file *file) ++{ ++ FiMustAnyLock(file); ++ return ftofi(file)->fi_bend; ++} ++ ++struct aufs_vdir *fvdir_cache(struct file *file) ++{ ++ FiMustAnyLock(file); ++ return ftofi(file)->fi_vdir_cache; ++} ++ ++struct aufs_branch *ftobr(struct file *file, aufs_bindex_t bindex) ++{ ++ struct aufs_finfo *finfo = ftofi(file); ++ struct aufs_hfile *hf; ++ ++ FiMustAnyLock(file); ++ DEBUG_ON(!finfo ++ || finfo->fi_bstart < 0 ++ || bindex < finfo->fi_bstart ++ || finfo->fi_bend < bindex); ++ hf = finfo->fi_hfile + bindex; ++ DEBUG_ON(hf->hf_br && br_count(hf->hf_br) <= 0); ++ return hf->hf_br; ++} ++ ++struct file *au_h_fptr_i(struct file *file, aufs_bindex_t bindex) ++{ ++ struct aufs_finfo *finfo = ftofi(file); ++ struct aufs_hfile *hf; ++ ++ FiMustAnyLock(file); ++ DEBUG_ON(!finfo ++ || finfo->fi_bstart < 0 ++ || bindex < finfo->fi_bstart ++ || finfo->fi_bend < bindex); ++ hf = finfo->fi_hfile + bindex; ++ DEBUG_ON(hf->hf_file ++ && file_count(hf->hf_file) <= 0 ++ && br_count(hf->hf_br) <= 0); ++ return hf->hf_file; ++} ++ ++struct file *au_h_fptr(struct file *file) ++{ ++ return au_h_fptr_i(file, fbstart(file)); ++} ++ ++void set_fbstart(struct file *file, aufs_bindex_t bindex) ++{ ++ FiMustWriteLock(file); ++ DEBUG_ON(sbend(file->f_dentry->d_sb) < bindex); ++ ftofi(file)->fi_bstart = bindex; ++} ++ ++void set_fbend(struct file *file, aufs_bindex_t bindex) ++{ ++ FiMustWriteLock(file); ++ DEBUG_ON(sbend(file->f_dentry->d_sb) < bindex ++ || bindex < fbstart(file)); ++ ftofi(file)->fi_bend = bindex; ++} ++ ++void set_fvdir_cache(struct file *file, struct aufs_vdir *vdir_cache) ++{ ++ FiMustWriteLock(file); ++ DEBUG_ON(!S_ISDIR(file->f_dentry->d_inode->i_mode) ++ || (ftofi(file)->fi_vdir_cache && vdir_cache)); ++ ftofi(file)->fi_vdir_cache = vdir_cache; ++} ++ ++void au_hfput(struct aufs_hfile *hf) ++{ ++ fput(hf->hf_file); ++ hf->hf_file = NULL; ++ DEBUG_ON(!hf->hf_br); ++ br_put(hf->hf_br); ++ hf->hf_br = NULL; ++} ++ ++void set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val) ++{ ++ struct aufs_finfo *finfo = ftofi(file); ++ struct aufs_hfile *hf; ++ ++ FiMustWriteLock(file); ++ DEBUG_ON(!finfo ++ || finfo->fi_bstart < 0 ++ || bindex < finfo->fi_bstart ++ || finfo->fi_bend < bindex); ++ DEBUG_ON(val && file_count(val) <= 0); ++ hf = finfo->fi_hfile + bindex; ++ DEBUG_ON(val && hf->hf_file); ++ if (hf->hf_file) ++ au_hfput(hf); ++ if (val) { ++ hf->hf_file = val; ++ hf->hf_br = stobr(file->f_dentry->d_sb, bindex); ++ } ++} ++ ++void au_update_figen(struct file *file) ++{ ++ atomic_set(&ftofi(file)->fi_generation, au_digen(file->f_dentry)); ++} ++ ++void au_fin_finfo(struct file *file) ++{ ++ struct aufs_finfo *finfo; ++ struct dentry *dentry; ++ aufs_bindex_t bindex, bend; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ SiMustAnyLock(dentry->d_sb); ++ ++ fi_write_lock(file); ++ bend = fbend(file); ++ bindex = fbstart(file); ++ if (bindex >= 0) ++ for (; bindex <= bend; bindex++) ++ set_h_fptr(file, bindex, NULL); ++ ++ finfo = ftofi(file); ++#ifdef CONFIG_AUFS_DEBUG ++ if (finfo->fi_bstart >= 0) { ++ bend = fbend(file); ++ for (bindex = finfo->fi_bstart; bindex <= bend; bindex++) { ++ struct aufs_hfile *hf; ++ hf = finfo->fi_hfile + bindex; ++ DEBUG_ON(hf->hf_file || hf->hf_br); ++ } ++ } ++#endif ++ ++ kfree(finfo->fi_hfile); ++ fi_write_unlock(file); ++ cache_free_finfo(finfo); ++ //file->private_data = NULL; ++} ++ ++int au_init_finfo(struct file *file) ++{ ++ struct aufs_finfo *finfo; ++ struct dentry *dentry; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ DEBUG_ON(!dentry->d_inode); ++ ++ finfo = cache_alloc_finfo(); ++ if (finfo) { ++ finfo->fi_hfile = kcalloc(sbend(dentry->d_sb) + 1, ++ sizeof(*finfo->fi_hfile), GFP_KERNEL); ++ if (finfo->fi_hfile) { ++ rw_init_wlock(&finfo->fi_rwsem); ++ finfo->fi_bstart = -1; ++ finfo->fi_bend = -1; ++ atomic_set(&finfo->fi_generation, au_digen(dentry)); ++ ++ file->private_data = finfo; ++ return 0; /* success */ ++ } ++ cache_free_finfo(finfo); ++ } ++ ++ TraceErr(-ENOMEM); ++ return -ENOMEM; ++} +diff --git a/fs/aufs/hinotify.c b/fs/aufs/hinotify.c +new file mode 100755 +index 0000000..3bad3f7 +--- /dev/null ++++ b/fs/aufs/hinotify.c +@@ -0,0 +1,536 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: hinotify.c,v 1.19 2007/05/14 03:39:21 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++static struct inotify_handle *in_handle; ++static const __u32 in_mask = (IN_MOVE | IN_DELETE | IN_CREATE /* | IN_ACCESS */ ++ | IN_MODIFY | IN_ATTRIB ++ | IN_DELETE_SELF | IN_MOVE_SELF); ++ ++int alloc_hinotify(struct aufs_hinode *hinode, struct inode *inode, ++ struct inode *hidden_inode) ++{ ++ int err; ++ struct aufs_hinotify *hin; ++ s32 wd; ++ ++ LKTRTrace("i%lu, hi%lu\n", inode->i_ino, hidden_inode->i_ino); ++ ++ err = -ENOMEM; ++ hin = cache_alloc_hinotify(); ++ if (hin) { ++ DEBUG_ON(hinode->hi_notify); ++ hinode->hi_notify = hin; ++ hin->hin_aufs_inode = inode; ++ inotify_init_watch(&hin->hin_watch); ++ wd = inotify_add_watch(in_handle, &hin->hin_watch, hidden_inode, ++ in_mask); ++ if (wd >= 0) ++ return 0; /* success */ ++ ++ err = wd; ++ put_inotify_watch(&hin->hin_watch); ++ cache_free_hinotify(hin); ++ hinode->hi_notify = NULL; ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++void do_free_hinotify(struct aufs_hinode *hinode) ++{ ++ int err; ++ struct aufs_hinotify *hin; ++ ++ TraceEnter(); ++ ++ hin = hinode->hi_notify; ++ if (hin) { ++ err = 0; ++ if (atomic_read(&hin->hin_watch.count)) ++ err = inotify_rm_watch(in_handle, &hin->hin_watch); ++ ++ if (!err) { ++ cache_free_hinotify(hin); ++ hinode->hi_notify = NULL; ++ } else ++ IOErr1("failed inotify_rm_watch() %d\n", err); ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static void ctl_hinotify(struct aufs_hinode *hinode, const __u32 mask) ++{ ++ struct inode *hi; ++ struct inotify_watch *watch; ++ ++ hi = hinode->hi_inode; ++ LKTRTrace("hi%lu, sb %p, 0x%x\n", hi->i_ino, hi->i_sb, mask); ++ if (0 && !strcmp(current->comm, "link")) ++ dump_stack(); ++ IMustLock(hi); ++ if (!hinode->hi_notify) ++ return; ++ ++ watch = &hinode->hi_notify->hin_watch; ++#if 0 ++ { ++ u32 wd; ++ wd = inotify_find_update_watch(in_handle, hi, mask); ++ TraceErr(wd); ++ // ignore an err; ++ } ++#else ++ watch->mask = mask; ++ smp_mb(); ++#endif ++ LKTRTrace("watch %p, mask %u\n", watch, watch->mask); ++} ++ ++#define suspend_hinotify(hi) ctl_hinotify(hi, 0) ++#define resume_hinotify(hi) ctl_hinotify(hi, in_mask) ++ ++void do_hdir_lock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex, ++ unsigned int lsc) ++{ ++ struct aufs_hinode *hinode; ++ ++ LKTRTrace("i%lu, b%d, lsc %d\n", dir->i_ino, bindex, lsc); ++ DEBUG_ON(!S_ISDIR(dir->i_mode)); ++ hinode = itoii(dir)->ii_hinode + bindex; ++ DEBUG_ON(h_dir != hinode->hi_inode); ++ ++ hi_lock(h_dir, lsc); ++ if (1 /* unlikely(au_flag_test(dir->i_sb, AuFlag_UDBA_HINOTIFY) */) ++ suspend_hinotify(hinode); ++} ++ ++void hdir_unlock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex) ++{ ++ struct aufs_hinode *hinode; ++ ++ LKTRTrace("i%lu, b%d\n", dir->i_ino, bindex); ++ DEBUG_ON(!S_ISDIR(dir->i_mode)); ++ hinode = itoii(dir)->ii_hinode + bindex; ++ DEBUG_ON(h_dir != hinode->hi_inode); ++ ++ if (1 /* unlikely(au_flag_test(dir->i_sb, AuFlag_UDBA_HINOTIFY) */) ++ resume_hinotify(hinode); ++ i_unlock(h_dir); ++} ++ ++void hdir_lock_rename(struct dentry **h_parents, struct inode **dirs, ++ aufs_bindex_t bindex, int issamedir) ++{ ++ struct aufs_hinode *hinode; ++ ++ LKTRTrace("%.*s, %.*s\n", DLNPair(h_parents[0]), DLNPair(h_parents[1])); ++ ++ vfsub_lock_rename(h_parents[0], h_parents[1]); ++ hinode = itoii(dirs[0])->ii_hinode + bindex; ++ DEBUG_ON(h_parents[0]->d_inode != hinode->hi_inode); ++ suspend_hinotify(hinode); ++ if (issamedir) ++ return; ++ hinode = itoii(dirs[1])->ii_hinode + bindex; ++ DEBUG_ON(h_parents[1]->d_inode != hinode->hi_inode); ++ suspend_hinotify(hinode); ++} ++ ++void hdir_unlock_rename(struct dentry **h_parents, struct inode **dirs, ++ aufs_bindex_t bindex, int issamedir) ++{ ++ struct aufs_hinode *hinode; ++ ++ LKTRTrace("%.*s, %.*s\n", DLNPair(h_parents[0]), DLNPair(h_parents[1])); ++ ++ hinode = itoii(dirs[0])->ii_hinode + bindex; ++ DEBUG_ON(h_parents[0]->d_inode != hinode->hi_inode); ++ resume_hinotify(hinode); ++ if (!issamedir) { ++ hinode = itoii(dirs[1])->ii_hinode + bindex; ++ DEBUG_ON(h_parents[1]->d_inode != hinode->hi_inode); ++ resume_hinotify(hinode); ++ } ++ vfsub_unlock_rename(h_parents[0], h_parents[1]); ++} ++ ++void au_reset_hinotify(struct inode *inode, unsigned int flags) ++{ ++ aufs_bindex_t bindex, bend; ++ struct inode *hi; ++ ++ LKTRTrace("i%lu, 0x%x\n", inode->i_ino, flags); ++ ++ bend = ibend(inode); ++ for (bindex = ibstart(inode); bindex <= bend; bindex++) { ++ hi = au_h_iptr_i(inode, bindex); ++ if (hi) { ++ //hi_lock(hi, AUFS_LSC_H_CHILD); ++ igrab(hi); ++ set_h_iptr(inode, bindex, NULL, 0); ++ set_h_iptr(inode, bindex, igrab(hi), flags); ++ iput(hi); ++ //i_unlock(hi); ++ } ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_DEBUG ++static char *in_name(u32 mask) ++{ ++#define test_ret(flag) if (mask & flag) return #flag; ++ test_ret(IN_ACCESS); ++ test_ret(IN_MODIFY); ++ test_ret(IN_ATTRIB); ++ test_ret(IN_CLOSE_WRITE); ++ test_ret(IN_CLOSE_NOWRITE); ++ test_ret(IN_OPEN); ++ test_ret(IN_MOVED_FROM); ++ test_ret(IN_MOVED_TO); ++ test_ret(IN_CREATE); ++ test_ret(IN_DELETE); ++ test_ret(IN_DELETE_SELF); ++ test_ret(IN_MOVE_SELF); ++ test_ret(IN_UNMOUNT); ++ test_ret(IN_Q_OVERFLOW); ++ test_ret(IN_IGNORED); ++ return ""; ++#undef test_ret ++} ++#else ++#define in_name(m) "??" ++#endif ++ ++static int dec_gen_by_name(struct inode *dir, const char *_name, u32 mask) ++{ ++ int err; ++ struct dentry *parent, *child; ++ struct inode *inode; ++ struct qstr *dname; ++ char *name = (void*)_name; ++ unsigned int len; ++ ++ LKTRTrace("i%lu, %s, 0x%x %s\n", ++ dir->i_ino, name, mask, in_name(mask)); ++ ++ err = -1; ++ parent = d_find_alias(dir); ++ if (unlikely(!parent)) ++ goto out; ++ ++#if 0 ++ if (unlikely(!memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))) ++ name += AUFS_WH_PFX_LEN; ++#endif ++ len = strlen(name); ++ spin_lock(&dcache_lock); ++ list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) { ++ dname = &child->d_name; ++ if (len == dname->len && !memcmp(dname->name, name, len)) { ++ au_digen_dec(child); ++#if 1 ++ //todo: why both are needed ++ if (mask & IN_MOVE) { ++ spin_lock(&child->d_lock); ++ __d_drop(child); ++ spin_unlock(&child->d_lock); ++ } ++#endif ++ ++ inode = child->d_inode; ++ if (inode) ++ au_iigen_dec(inode); ++ err = !!inode; ++ ++ // todo: the i_nlink of newly created name by link(2) ++ // should be updated ++ // todo: some nfs dentry doesn't notified at deleteing ++ break; ++ } ++ } ++ spin_unlock(&dcache_lock); ++ dput(parent); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++struct postproc_args { ++ struct inode *h_dir, *dir, *h_child_inode; ++ char *h_child_name; ++ u32 mask; ++}; ++ ++static void dec_gen_by_ino(struct postproc_args *a) ++{ ++ struct super_block *sb; ++ aufs_bindex_t bindex, bend, bfound; ++ struct xino xino; ++ struct inode *cinode; ++ ++ TraceEnter(); ++ ++ sb = a->dir->i_sb; ++ DEBUG_ON(!au_flag_test(sb, AuFlag_XINO)); ++ ++ bfound = -1; ++ bend = ibend(a->dir); ++ for (bindex = ibstart(a->dir); bfound == -1 && bindex <= bend; bindex++) ++ if (au_h_iptr_i(a->dir, bindex) == a->h_dir) ++ bfound = bindex; ++ if (bfound < 0) ++ return; ++ ++ bindex = find_brindex(sb, itoii(a->dir)->ii_hinode[bfound + 0].hi_id); ++ if (bindex < 0) ++ return; ++ if (unlikely(xino_read(sb, bindex, a->h_child_inode->i_ino, &xino))) ++ return; ++ cinode = NULL; ++ if (xino.ino) ++ cinode = ilookup(sb, xino.ino); ++ if (cinode) { ++#if 1 ++ if (1 || a->mask & IN_MOVE) { ++ struct dentry *child; ++ spin_lock(&dcache_lock); ++ list_for_each_entry(child, &cinode->i_dentry, d_alias) ++ au_digen_dec(child); ++ spin_unlock(&dcache_lock); ++ } ++#endif ++ au_iigen_dec(cinode); ++ iput(cinode); ++ } ++} ++ ++static void reset_ino(struct postproc_args *a) ++{ ++ aufs_bindex_t bindex, bend; ++ struct super_block *sb; ++ struct inode *h_dir; ++ ++ sb = a->dir->i_sb; ++ bend = ibend(a->dir); ++ for (bindex = ibstart(a->dir); bindex <= bend; bindex++) { ++ h_dir = au_h_iptr_i(a->dir, bindex); ++ if (h_dir && h_dir != a->h_dir) ++ xino_write0(sb, bindex, h_dir->i_ino); ++ /* ignore this error */ ++ } ++} ++ ++static void postproc(void *args) ++{ ++ struct postproc_args *a = args; ++ struct super_block *sb; ++ struct aufs_vdir *vdir; ++ ++ //au_debug_on(); ++ LKTRTrace("mask 0x%x %s, i%lu, hi%lu, hci%lu\n", ++ a->mask, in_name(a->mask), a->dir->i_ino, a->h_dir->i_ino, ++ a->h_child_inode ? a->h_child_inode->i_ino : 0); ++ DEBUG_ON(!a->dir); ++#if 0//def ForceInotify ++ Dbg("mask 0x%x %s, i%lu, hi%lu, hci%lu\n", ++ a->mask, in_name(a->mask), a->dir->i_ino, a->h_dir->i_ino, ++ a->h_child_inode ? a->h_child_inode->i_ino : 0); ++#endif ++ ++ i_lock(a->dir); ++ sb = a->dir->i_sb; ++ si_read_lock(sb); // consider write_lock ++ ii_write_lock_parent(a->dir); ++ ++ /* make dir entries obsolete */ ++ vdir = ivdir(a->dir); ++ if (vdir) ++ vdir->vd_jiffy = 0; ++ a->dir->i_version++; ++ ++ /* ++ * special handling root directory, ++ * sine d_revalidate may not be called later. ++ * main purpose is maintaining i_nlink. ++ */ ++ if (unlikely(a->dir->i_ino == AUFS_ROOT_INO)) ++ au_cpup_attr_all(a->dir); ++ ++ if (a->h_child_inode && au_flag_test(sb, AuFlag_XINO)) ++ dec_gen_by_ino(a); ++ else if (a->mask & (IN_MOVE_SELF | IN_DELETE_SELF)) ++ reset_ino(a); ++ ++ ii_write_unlock(a->dir); ++ si_read_unlock(sb); ++ i_unlock(a->dir); ++ ++ au_mntput(a->dir->i_sb); ++ iput(a->h_child_inode); ++ iput(a->h_dir); ++ iput(a->dir); ++#if 0 ++ if (atomic_dec_and_test(&stosi(sb)->si_hinotify)) ++ wake_up_all(&stosi(sb)->si_hinotify_wq); ++#endif ++ kfree(a); ++ //au_debug_off(); ++} ++ ++static void aufs_inotify(struct inotify_watch *watch, u32 wd, u32 mask, ++ u32 cookie, const char *h_child_name, ++ struct inode *h_child_inode) ++{ ++ struct aufs_hinotify *hinotify; ++ struct postproc_args *args; ++ int len; ++ char *p; ++ struct inode *dir; ++ //static DECLARE_WAIT_QUEUE_HEAD(wq); ++ ++ //au_debug_on(); ++ LKTRTrace("i%lu, wd %d, mask 0x%x %s, cookie 0x%x, hcname %s, hi%lu\n", ++ watch->inode->i_ino, wd, mask, in_name(mask), cookie, ++ h_child_name ? h_child_name : "", ++ h_child_inode ? h_child_inode->i_ino : 0); ++ //au_debug_off(); ++ //IMustLock(h_dir); ++#if 0 //defined(ForceInotify) || defined(DbgInotify) ++ Dbg("i%lu, wd %d, mask 0x%x %s, cookie 0x%x, hcname %s, hi%lu\n", ++ watch->inode->i_ino, wd, mask, in_name(mask), cookie, ++ h_child_name ? h_child_name : "", ++ h_child_inode ? h_child_inode->i_ino : 0); ++#endif ++ /* if IN_UNMOUNT happens, there must be another bug */ ++ if (mask & (IN_IGNORED | IN_UNMOUNT)) { ++ put_inotify_watch(watch); ++ return; ++ } ++ ++ switch (mask & IN_ALL_EVENTS) { ++ case IN_MODIFY: ++ case IN_ATTRIB: ++ if (h_child_name) ++ return; ++ break; ++ ++ case IN_MOVED_FROM: ++ case IN_MOVED_TO: ++ case IN_CREATE: ++ DEBUG_ON(!h_child_name || !h_child_inode); ++ break; ++ case IN_DELETE: ++ /* ++ * aufs never be able to get this child inode. ++ * revalidation should be in d_revalide() ++ * by checking i_nlink, i_generation or d_unhashed(). ++ */ ++ DEBUG_ON(!h_child_name); ++ break; ++ ++ case IN_DELETE_SELF: ++ case IN_MOVE_SELF: ++ DEBUG_ON(h_child_name || h_child_inode); ++ break; ++ ++ case IN_ACCESS: ++ default: ++ DEBUG_ON(1); ++ } ++ ++#ifdef DbgInotify ++ WARN_ON(1); ++#endif ++ ++ /* iput() will be called in postproc() */ ++ hinotify = container_of(watch, struct aufs_hinotify, hin_watch); ++ DEBUG_ON(!hinotify || !hinotify->hin_aufs_inode); ++ dir = hinotify->hin_aufs_inode; ++ ++ /* force re-lookup in next d_revalidate() */ ++ if (dir->i_ino != AUFS_ROOT_INO) ++ au_iigen_dec(dir); ++ len = 0; ++ if (h_child_name && dec_gen_by_name(dir, h_child_name, mask)) ++ len = strlen(h_child_name); ++ ++ //wait_event(wq, (args = kmalloc(sizeof(*args), GFP_KERNEL))); ++ args = kmalloc(sizeof(*args) + len + 1, GFP_KERNEL); ++ if (unlikely(!args)) { ++ Err("no memory\n"); ++ return; ++ } ++ args->mask = mask; ++ args->dir = igrab(dir); ++ args->h_dir = igrab(watch->inode); ++ args->h_child_inode = NULL; ++ if (len) { ++ if (h_child_inode) ++ args->h_child_inode = igrab(h_child_inode); ++ p = (void*)args; ++ args->h_child_name = p + sizeof(*args); ++ memcpy(args->h_child_name, h_child_name, len + 1); ++ } ++ //atomic_inc(&stosi(args->dir->i_sb)->si_hinotify); ++ /* prohibit umount */ ++ au_mntget(args->dir->i_sb); ++ au_wkq_nowait(postproc, args, /*dlgt*/0); ++} ++ ++#if 0 ++void hinotify_flush(struct super_block *sb) ++{ ++ atomic_t *p = &stosi(sb)->si_hinotify; ++ wait_event(stosi(sb)->si_hinotify_wq, !atomic_read(p)); ++} ++#endif ++ ++static void aufs_inotify_destroy(struct inotify_watch *watch) ++{ ++ return; ++} ++ ++static struct inotify_operations aufs_inotify_ops = { ++ .handle_event = aufs_inotify, ++ .destroy_watch = aufs_inotify_destroy ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++int __init au_inotify_init(void) ++{ ++ in_handle = inotify_init(&aufs_inotify_ops); ++ if (!IS_ERR(in_handle)) ++ return 0; ++ TraceErrPtr(in_handle); ++ return PTR_ERR(in_handle); ++} ++ ++void au_inotify_fin(void) ++{ ++ inotify_destroy(in_handle); ++} +diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c +new file mode 100755 +index 0000000..1cd0453 +--- /dev/null ++++ b/fs/aufs/i_op.c +@@ -0,0 +1,641 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: i_op.c,v 1.30 2007/04/23 00:55:05 sfjro Exp $ */ ++ ++//#include <linux/fs.h> ++//#include <linux/namei.h> ++#include <linux/security.h> ++#include <asm/uaccess.h> ++#include "aufs.h" ++ ++#ifdef CONFIG_AUFS_DLGT ++struct security_inode_permission_args { ++ int *errp; ++ struct inode *h_inode; ++ int mask; ++ struct nameidata *fake_nd; ++}; ++ ++static void call_security_inode_permission(void *args) ++{ ++ struct security_inode_permission_args *a = args; ++ LKTRTrace("fsuid %d\n", current->fsuid); ++ *a->errp = security_inode_permission(a->h_inode, a->mask, a->fake_nd); ++} ++#endif ++ ++static int hidden_permission(struct inode *hidden_inode, int mask, ++ struct nameidata *fake_nd, int brperm, int dlgt) ++{ ++ int err, submask; ++ const int write_mask = (mask & (MAY_WRITE | MAY_APPEND)); ++ ++ LKTRTrace("ino %lu, mask 0x%x, brperm 0x%x\n", ++ hidden_inode->i_ino, mask, brperm); ++ ++ err = -EACCES; ++ if (unlikely(write_mask && IS_IMMUTABLE(hidden_inode))) ++ goto out; ++ ++ /* skip hidden fs test in the case of write to ro branch */ ++ submask = mask & ~MAY_APPEND; ++ if (unlikely((write_mask && !br_writable(brperm)) ++ || !hidden_inode->i_op ++ || !hidden_inode->i_op->permission)) { ++ //LKTRLabel(generic_permission); ++ err = generic_permission(hidden_inode, submask, NULL); ++ } else { ++ //LKTRLabel(h_inode->permission); ++ err = hidden_inode->i_op->permission(hidden_inode, submask, ++ fake_nd); ++ TraceErr(err); ++ } ++ ++#if 1 ++ if (!err) { ++#ifndef CONFIG_AUFS_DLGT ++ err = security_inode_permission(hidden_inode, mask, fake_nd); ++#else ++ if (!dlgt) ++ err = security_inode_permission(hidden_inode, mask, ++ fake_nd); ++ else { ++ struct security_inode_permission_args args = { ++ .errp = &err, ++ .h_inode = hidden_inode, ++ .mask = mask, ++ .fake_nd = fake_nd ++ }; ++ au_wkq_wait(call_security_inode_permission, &args, ++ /*dlgt*/1); ++ } ++#endif ++ } ++#endif ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static int silly_lock(struct inode *inode, struct nameidata *nd) ++{ ++ int locked = 0; ++ struct super_block *sb = inode->i_sb; ++ ++ LKTRTrace("i%lu, nd %p\n", inode->i_ino, nd); ++ ++#ifdef CONFIG_AUFS_FAKE_DM ++ si_read_lock(sb); ++ ii_read_lock_child(inode); ++#else ++ if (!nd || !nd->dentry) { ++ si_read_lock(sb); ++ ii_read_lock_child(inode); ++ } else if (nd->dentry->d_inode != inode) { ++ locked = 1; ++ /* lock child first, then parent */ ++ si_read_lock(sb); ++ ii_read_lock_child(inode); ++ di_read_lock_parent(nd->dentry, 0); ++ } else { ++ locked = 2; ++ aufs_read_lock(nd->dentry, AUFS_I_RLOCK); ++ } ++#endif ++ return locked; ++} ++ ++static void silly_unlock(int locked, struct inode *inode, struct nameidata *nd) ++{ ++ struct super_block *sb = inode->i_sb; ++ ++ LKTRTrace("locked %d, i%lu, nd %p\n", locked, inode->i_ino, nd); ++ ++#ifdef CONFIG_AUFS_FAKE_DM ++ ii_read_unlock(inode); ++ si_read_unlock(sb); ++#else ++ switch (locked) { ++ case 0: ++ ii_read_unlock(inode); ++ si_read_unlock(sb); ++ break; ++ case 1: ++ di_read_unlock(nd->dentry, 0); ++ ii_read_unlock(inode); ++ si_read_unlock(sb); ++ break; ++ case 2: ++ aufs_read_unlock(nd->dentry, AUFS_I_RLOCK); ++ break; ++ default: ++ BUG(); ++ } ++#endif ++} ++ ++static int aufs_permission(struct inode *inode, int mask, struct nameidata *nd) ++{ ++ int err, locked, dlgt; ++ aufs_bindex_t bindex, bend; ++ struct inode *hidden_inode; ++ struct super_block *sb; ++ struct nameidata fake_nd, *p; ++ const int write_mask = (mask & (MAY_WRITE | MAY_APPEND)); ++ const int nondir = !S_ISDIR(inode->i_mode); ++ ++ LKTRTrace("ino %lu, mask 0x%x, nondir %d, write_mask %d, " ++ "nd %p{%p, %p}\n", ++ inode->i_ino, mask, nondir, write_mask, ++ nd, nd ? nd->dentry : NULL, nd ? nd->mnt : NULL); ++ ++ sb = inode->i_sb; ++ locked = silly_lock(inode, nd); ++ dlgt = need_dlgt(sb); ++ ++ if (nd) ++ fake_nd = *nd; ++ if (/* unlikely */(nondir || write_mask)) { ++ hidden_inode = au_h_iptr(inode); ++ DEBUG_ON(!hidden_inode ++ || ((hidden_inode->i_mode & S_IFMT) ++ != (inode->i_mode & S_IFMT))); ++ err = 0; ++ bindex = ibstart(inode); ++ p = fake_dm(&fake_nd, nd, sb, bindex); ++ /* actual test will be delegated to LSM */ ++ if (IS_ERR(p)) ++ DEBUG_ON(PTR_ERR(p) != -ENOENT); ++ else { ++ err = hidden_permission(hidden_inode, mask, p, ++ sbr_perm(sb, bindex), dlgt); ++ fake_dm_release(p); ++ } ++ if (write_mask && !err) { ++ err = find_rw_br(sb, bindex); ++ if (err >= 0) ++ err = 0; ++ } ++ goto out; ++ } ++ ++ /* non-write to dir */ ++ err = 0; ++ bend = ibend(inode); ++ for (bindex = ibstart(inode); !err && bindex <= bend; bindex++) { ++ hidden_inode = au_h_iptr_i(inode, bindex); ++ if (!hidden_inode) ++ continue; ++ DEBUG_ON(!S_ISDIR(hidden_inode->i_mode)); ++ ++ p = fake_dm(&fake_nd, nd, sb, bindex); ++ /* actual test will be delegated to LSM */ ++ if (IS_ERR(p)) ++ DEBUG_ON(PTR_ERR(p) != -ENOENT); ++ else { ++ err = hidden_permission(hidden_inode, mask, p, ++ sbr_perm(sb, bindex), dlgt); ++ fake_dm_release(p); ++ } ++ } ++ ++ out: ++ silly_unlock(locked, inode, nd); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct dentry *ret, *parent; ++ int err, npositive; ++ struct inode *inode; ++ ++ LKTRTrace("dir %lu, %.*s\n", dir->i_ino, DLNPair(dentry)); ++ DEBUG_ON(IS_ROOT(dentry)); ++ IMustLock(dir); ++ ++ parent = dentry->d_parent; // dget_parent() ++ aufs_read_lock(parent, !AUFS_I_RLOCK); ++ err = au_alloc_dinfo(dentry); ++ //if (LktrCond) err = -1; ++ ret = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ ++ err = npositive = lkup_dentry(dentry, dbstart(parent), /*type*/0); ++ //err = -1; ++ ret = ERR_PTR(err); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ inode = NULL; ++ if (npositive) { ++ inode = au_new_inode(dentry); ++ ret = (void*)inode; ++ } ++ if (!IS_ERR(inode)) { ++#if 1 ++ /* d_splice_alias() also supports d_add() */ ++ ret = d_splice_alias(inode, dentry); ++ if (unlikely(IS_ERR(ret) && inode)) ++ ii_write_unlock(inode); ++#else ++ d_add(dentry, inode); ++#endif ++ } ++ ++ out_unlock: ++ di_write_unlock(dentry); ++ out: ++ aufs_read_unlock(parent, !AUFS_I_RLOCK); ++ TraceErrPtr(ret); ++ return ret; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * decide the branch and the parent dir where we will create a new entry. ++ * returns new bindex or an error. ++ * copyup the parent dir if needed. ++ */ ++int wr_dir(struct dentry *dentry, int add_entry, struct dentry *src_dentry, ++ aufs_bindex_t force_btgt, int do_lock_srcdir) ++{ ++ int err; ++ aufs_bindex_t bcpup, bstart, src_bstart; ++ struct dentry *hidden_parent; ++ struct super_block *sb; ++ struct dentry *parent, *src_parent = NULL; ++ struct inode *dir, *src_dir = NULL; ++ ++ LKTRTrace("%.*s, add %d, src %p, force %d, lock_srcdir %d\n", ++ DLNPair(dentry), add_entry, src_dentry, force_btgt, ++ do_lock_srcdir); ++ ++ sb = dentry->d_sb; ++ parent = dentry->d_parent; // dget_parent() ++ bcpup = bstart = dbstart(dentry); ++ if (force_btgt < 0) { ++ if (src_dentry) { ++ src_bstart = dbstart(src_dentry); ++ if (src_bstart < bstart) ++ bcpup = src_bstart; ++ } ++ if (test_ro(sb, bcpup, dentry->d_inode)) { ++ if (!add_entry) ++ di_read_lock_parent(parent, !AUFS_I_RLOCK); ++ bcpup = err = find_rw_parent_br(dentry, bcpup); ++ //bcpup = err = find_rw_br(sb, bcpup); ++ if (!add_entry) ++ di_read_unlock(parent, !AUFS_I_RLOCK); ++ //err = -1; ++ if (unlikely(err < 0)) ++ goto out; ++ } ++ } else { ++ DEBUG_ON(bstart <= force_btgt ++ || test_ro(sb, force_btgt, dentry->d_inode)); ++ bcpup = force_btgt; ++ } ++ LKTRTrace("bstart %d, bcpup %d\n", bstart, bcpup); ++ ++ err = bcpup; ++ if (bcpup == bstart) ++ goto out; /* success */ ++ ++ /* copyup the new parent into the branch we process */ ++ hidden_parent = au_h_dptr(dentry)->d_parent; // dget_parent() ++ if (src_dentry) { ++ src_parent = src_dentry->d_parent; // dget_parent() ++ src_dir = src_parent->d_inode; ++ if (do_lock_srcdir) ++ di_write_lock_parent2(src_parent); ++ } ++ ++ dir = parent->d_inode; ++ if (add_entry) { ++ au_update_dbstart(dentry); ++ IMustLock(dir); ++ DiMustWriteLock(parent); ++ IiMustWriteLock(dir); ++ } else ++ di_write_lock_parent(parent); ++ ++ err = 0; ++ if (!au_h_dptr_i(parent, bcpup)) ++ err = cpup_dirs(dentry, bcpup, src_parent); ++ //err = -1; ++ if (!err && add_entry) { ++ hidden_parent = au_h_dptr_i(parent, bcpup); ++ DEBUG_ON(!hidden_parent || !hidden_parent->d_inode); ++ hi_lock_parent(hidden_parent->d_inode); ++ err = lkup_neg(dentry, bcpup); ++ //err = -1; ++ i_unlock(hidden_parent->d_inode); ++ } ++ ++ if (!add_entry) ++ di_write_unlock(parent); ++ if (do_lock_srcdir) ++ di_write_unlock(src_parent); ++ if (!err) ++ err = bcpup; /* success */ ++ //err = -EPERM; ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int aufs_setattr(struct dentry *dentry, struct iattr *ia) ++{ ++ int err, isdir; ++ aufs_bindex_t bstart, bcpup; ++ struct inode *hidden_inode, *inode, *dir, *h_dir, *gh_dir, *gdir; ++ struct dentry *hidden_dentry, *parent; ++ unsigned int udba; ++ ++ LKTRTrace("%.*s, ia_valid 0x%x\n", DLNPair(dentry), ia->ia_valid); ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ ++ aufs_read_lock(dentry, AUFS_D_WLOCK); ++ bstart = dbstart(dentry); ++ bcpup = err = wr_dir(dentry, /*add*/0, /*src_dentry*/NULL, ++ /*force_btgt*/-1, /*do_lock_srcdir*/0); ++ //err = -1; ++ if (unlikely(err < 0)) ++ goto out; ++ ++ /* crazy udba locks */ ++ udba = au_flag_test(dentry->d_sb, AuFlag_UDBA_INOTIFY); ++ parent = NULL; ++ gdir = gh_dir = dir = h_dir = NULL; ++ if ((udba || bstart != bcpup) && !IS_ROOT(dentry)) { ++ parent = dentry->d_parent; // dget_parent() ++ dir = parent->d_inode; ++ di_read_lock_parent(parent, AUFS_I_RLOCK); ++ h_dir = au_h_iptr_i(dir, bcpup); ++ } ++ if (parent) { ++ if (unlikely(udba && !IS_ROOT(parent))) { ++ gdir = parent->d_parent->d_inode; // dget_parent() ++ ii_read_lock_parent2(gdir); ++ gh_dir = au_h_iptr_i(gdir, bcpup); ++ hgdir_lock(gh_dir, gdir, bcpup); ++ } ++ hdir_lock(h_dir, dir, bcpup); ++ } ++ ++ isdir = S_ISDIR(inode->i_mode); ++ hidden_dentry = au_h_dptr(dentry); ++ hidden_inode = hidden_dentry->d_inode; ++ DEBUG_ON(!hidden_inode); ++ ++#define HiLock(bindex) do {\ ++ if (!isdir) \ ++ hi_lock_child(hidden_inode); \ ++ else \ ++ hdir2_lock(hidden_inode, inode, bindex); \ ++ } while (0) ++#define HiUnlock(bindex) do {\ ++ if (!isdir) \ ++ i_unlock(hidden_inode); \ ++ else \ ++ hdir_unlock(hidden_inode, inode, bindex); \ ++ } while (0) ++ ++ if (bstart != bcpup) { ++ loff_t size = -1; ++ ++ if ((ia->ia_valid & ATTR_SIZE) ++ && ia->ia_size < i_size_read(inode)) { ++ size = ia->ia_size; ++ ia->ia_valid &= ~ATTR_SIZE; ++ } ++ HiLock(bstart); ++ err = sio_cpup_simple(dentry, bcpup, size, ++ au_flags_cpup(CPUP_DTIME, parent)); ++ //err = -1; ++ HiUnlock(bstart); ++ if (unlikely(err || !ia->ia_valid)) ++ goto out_unlock; ++ ++ hidden_dentry = au_h_dptr(dentry); ++ hidden_inode = hidden_dentry->d_inode; ++ DEBUG_ON(!hidden_inode); ++ } ++ ++ HiLock(bcpup); ++ err = vfsub_notify_change(hidden_dentry, ia, need_dlgt(dentry->d_sb)); ++ //err = -1; ++ if (!err) ++ au_cpup_attr_changable(inode); ++ HiUnlock(bcpup); ++#undef HiLock ++#undef HiUnlock ++ ++ out_unlock: ++ if (parent) { ++ hdir_unlock(h_dir, dir, bcpup); ++ di_read_unlock(parent, AUFS_I_RLOCK); ++ } ++ if (unlikely(gdir)) { ++ hdir_unlock(gh_dir, gdir, bcpup); ++ ii_read_unlock(gdir); ++ } ++ out: ++ aufs_read_unlock(dentry, AUFS_D_WLOCK); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int hidden_readlink(struct dentry *dentry, int bindex, ++ char __user * buf, int bufsiz) ++{ ++ struct super_block *sb; ++ struct dentry *hidden_dentry; ++ ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (unlikely(!hidden_dentry->d_inode->i_op ++ || !hidden_dentry->d_inode->i_op->readlink)) ++ return -EINVAL; ++ ++ sb = dentry->d_sb; ++ if (!test_ro(sb, bindex, dentry->d_inode)) { ++ touch_atime(sbr_mnt(sb, bindex), hidden_dentry); ++ dentry->d_inode->i_atime = hidden_dentry->d_inode->i_atime; ++ } ++ return hidden_dentry->d_inode->i_op->readlink ++ (hidden_dentry, buf, bufsiz); ++} ++ ++static int aufs_readlink(struct dentry *dentry, char __user * buf, int bufsiz) ++{ ++ int err; ++ ++ LKTRTrace("%.*s, %d\n", DLNPair(dentry), bufsiz); ++ ++ aufs_read_lock(dentry, AUFS_I_RLOCK); ++ err = hidden_readlink(dentry, dbstart(dentry), buf, bufsiz); ++ //err = -1; ++ aufs_read_unlock(dentry, AUFS_I_RLOCK); ++ TraceErr(err); ++ return err; ++} ++ ++static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ int err; ++ char *buf; ++ mm_segment_t old_fs; ++ ++ LKTRTrace("%.*s, nd %.*s\n", DLNPair(dentry), DLNPair(nd->dentry)); ++ ++ err = -ENOMEM; ++ buf = __getname(); ++ //buf = NULL; ++ if (unlikely(!buf)) ++ goto out; ++ ++ aufs_read_lock(dentry, AUFS_I_RLOCK); ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = hidden_readlink(dentry, dbstart(dentry), (char __user *)buf, ++ PATH_MAX); ++ //err = -1; ++ set_fs(old_fs); ++ aufs_read_unlock(dentry, AUFS_I_RLOCK); ++ ++ if (err >= 0) { ++ buf[err] = 0; ++ /* will be freed by put_link */ ++ nd_set_link(nd, buf); ++ return NULL; /* success */ ++ } ++ __putname(buf); ++ ++ out: ++ path_release(nd); ++ TraceErr(err); ++ return ERR_PTR(err); ++} ++ ++static void aufs_put_link(struct dentry *dentry, struct nameidata *nd, ++ void *cookie) ++{ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ __putname(nd_get_link(nd)); ++} ++ ++/* ---------------------------------------------------------------------- */ ++#if 0 // comment ++struct inode_operations { ++ int (*create) (struct inode *,struct dentry *,int, struct nameidata *); ++ struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); ++ int (*link) (struct dentry *,struct inode *,struct dentry *); ++ int (*unlink) (struct inode *,struct dentry *); ++ int (*symlink) (struct inode *,struct dentry *,const char *); ++ int (*mkdir) (struct inode *,struct dentry *,int); ++ int (*rmdir) (struct inode *,struct dentry *); ++ int (*mknod) (struct inode *,struct dentry *,int,dev_t); ++ int (*rename) (struct inode *, struct dentry *, ++ struct inode *, struct dentry *); ++ int (*readlink) (struct dentry *, char __user *,int); ++ void * (*follow_link) (struct dentry *, struct nameidata *); ++ void (*put_link) (struct dentry *, struct nameidata *, void *); ++ void (*truncate) (struct inode *); ++ int (*permission) (struct inode *, int, struct nameidata *); ++ int (*setattr) (struct dentry *, struct iattr *); ++ int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); ++ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); ++ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ++ ssize_t (*listxattr) (struct dentry *, char *, size_t); ++ int (*removexattr) (struct dentry *, const char *); ++ void (*truncate_range)(struct inode *, loff_t, loff_t); ++}; ++#endif ++ ++struct inode_operations aufs_symlink_iop = { ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ ++ .readlink = aufs_readlink, ++ .follow_link = aufs_follow_link, ++ .put_link = aufs_put_link ++}; ++ ++//i_op_add.c ++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev); ++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname); ++int aufs_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd); ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry); ++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode); ++ ++//i_op_del.c ++int aufs_unlink(struct inode *dir, struct dentry *dentry); ++int aufs_rmdir(struct inode *dir, struct dentry *dentry); ++ ++// i_op_ren.c ++int aufs_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry); ++ ++struct inode_operations aufs_dir_iop = { ++ .create = aufs_create, ++ .lookup = aufs_lookup, ++ .link = aufs_link, ++ .unlink = aufs_unlink, ++ .symlink = aufs_symlink, ++ .mkdir = aufs_mkdir, ++ .rmdir = aufs_rmdir, ++ .mknod = aufs_mknod, ++ .rename = aufs_rename, ++ ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ ++#if 0 // xattr ++ .setxattr = aufs_setxattr, ++ .getxattr = aufs_getxattr, ++ .listxattr = aufs_listxattr, ++ .removexattr = aufs_removexattr ++#endif ++}; ++ ++struct inode_operations aufs_iop = { ++ .permission = aufs_permission, ++ .setattr = aufs_setattr, ++ ++#if 0 // xattr ++ .setxattr = aufs_setxattr, ++ .getxattr = aufs_getxattr, ++ .listxattr = aufs_listxattr, ++ .removexattr = aufs_removexattr ++#endif ++}; +diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c +new file mode 100755 +index 0000000..977d773 +--- /dev/null ++++ b/fs/aufs/i_op_add.c +@@ -0,0 +1,621 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: i_op_add.c,v 1.37 2007/05/07 03:46:08 sfjro Exp $ */ ++ ++//#include <linux/fs.h> ++//#include <linux/namei.h> ++#include "aufs.h" ++ ++/* ++ * final procedure of adding a new entry, except link(2). ++ * remove whiteout, instantiate, copyup the parent dir's times and size ++ * and update version. ++ * if it failed, re-create the removed whiteout. ++ */ ++static int epilog(struct dentry *wh_dentry, struct dentry *dentry) ++{ ++ int err, rerr; ++ aufs_bindex_t bwh; ++ struct inode *inode, *dir; ++ struct dentry *wh; ++ struct lkup_args lkup; ++ ++ LKTRTrace("wh %p, %.*s\n", wh_dentry, DLNPair(dentry)); ++ ++ lkup.dlgt = need_dlgt(dentry->d_sb); ++ bwh = -1; ++ if (wh_dentry) { ++ bwh = dbwh(dentry); ++ err = au_unlink_wh_dentry(wh_dentry->d_parent->d_inode, ++ wh_dentry, dentry, lkup.dlgt); ++ //err = -1; ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ inode = au_new_inode(dentry); ++ //inode = ERR_PTR(-1); ++ if (!IS_ERR(inode)) { ++ d_instantiate(dentry, inode); ++ dir = dentry->d_parent->d_inode; ++ /* or always cpup dir mtime? */ ++ if (ibstart(dir) == dbstart(dentry)) ++ au_cpup_attr_timesizes(dir); ++ dir->i_version++; ++ return 0; /* success */ ++ } ++ ++ err = PTR_ERR(inode); ++ if (!wh_dentry) ++ goto out; ++ ++ /* revert */ ++ lkup.nfsmnt = au_nfsmnt(dentry->d_sb, bwh); ++ wh = simple_create_wh(dentry, bwh, wh_dentry->d_parent, &lkup); ++ //wh = ERR_PTR(-1); ++ rerr = PTR_ERR(wh); ++ if (!IS_ERR(wh)) { ++ dput(wh); ++ goto out; ++ } ++ IOErr("%.*s reverting whiteout failed(%d, %d)\n", ++ DLNPair(dentry), err, rerr); ++ err = -EIO; ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * initial procedure of adding a new entry. ++ * prepare writable branch and the parent dir, lock it, ++ * lookup whiteout for the new entry. ++ */ ++static struct dentry * ++lock_hdir_lkup_wh(struct dentry *dentry, struct dtime *dt, ++ struct dentry *src_dentry, int do_lock_srcdir) ++{ ++ struct dentry *wh_dentry, *parent, *hidden_parent; ++ int err; ++ aufs_bindex_t bstart, bcpup; ++ struct inode *dir, *h_dir; ++ struct lkup_args lkup; ++ ++ LKTRTrace("%.*s, src %p\n", DLNPair(dentry), src_dentry); ++ ++ parent = dentry->d_parent; ++ bstart = dbstart(dentry); ++ bcpup = err = wr_dir(dentry, 1, src_dentry, -1, do_lock_srcdir); ++ //err = -1; ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err < 0)) ++ goto out; ++ ++ dir = parent->d_inode; ++ hidden_parent = au_h_dptr_i(parent, bcpup); ++ h_dir = hidden_parent->d_inode; ++ hdir_lock(h_dir, dir, bcpup); ++ if (dt) ++ dtime_store(dt, parent, hidden_parent); ++ if (/* bcpup != bstart || */ bcpup != dbwh(dentry)) ++ return NULL; /* success */ ++ ++ lkup.nfsmnt = au_nfsmnt(parent->d_sb, bcpup); ++ lkup.dlgt = need_dlgt(parent->d_sb); ++ wh_dentry = lkup_wh(hidden_parent, &dentry->d_name, &lkup); ++ //wh_dentry = ERR_PTR(-1); ++ if (IS_ERR(wh_dentry)) ++ hdir_unlock(h_dir, dir, bcpup); ++ ++ out: ++ TraceErrPtr(wh_dentry); ++ return wh_dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++enum {Mknod, Symlink, Creat}; ++struct simple_arg { ++ int type; ++ union { ++ struct { ++ int mode; ++ struct nameidata *nd; ++ } c; ++ struct { ++ const char *symname; ++ } s; ++ struct { ++ int mode; ++ dev_t dev; ++ } m; ++ } u; ++}; ++ ++static int add_simple(struct inode *dir, struct dentry *dentry, ++ struct simple_arg *arg) ++{ ++ int err, dlgt; ++ struct dentry *hidden_dentry, *hidden_parent, *wh_dentry, *parent; ++ struct inode *hidden_dir; ++ struct dtime dt; ++ ++ LKTRTrace("type %d, %.*s\n", arg->type, DLNPair(dentry)); ++ IMustLock(dir); ++ ++ aufs_read_lock(dentry, AUFS_D_WLOCK); ++ parent = dentry->d_parent; ++ di_write_lock_parent(parent); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, ++ /*do_lock_srcdir*/0); ++ //wh_dentry = ERR_PTR(-1); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out; ++ ++ hidden_dentry = au_h_dptr(dentry); ++ hidden_parent = hidden_dentry->d_parent; ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ dlgt = need_dlgt(dir->i_sb); ++ ++#if 1 // partial testing ++ switch (arg->type) { ++ case Creat: ++#if 0 ++ if (arg->u.c.nd) { ++ struct nameidata fake_nd; ++ fake_nd = *arg->u.c.nd; ++ fake_nd.dentry = dget(hidden_parent); ++ fake_nd.mnt = sbr_mnt(dentry->d_sb, dbstart(dentry)); ++ mntget(fake_nd.mnt); ++ err = vfsub_create(hidden_dir, hidden_dentry, ++ arg->u.c.mode, &fake_nd, dlgt); ++ path_release(&fake_nd); ++ } else ++#endif ++ err = vfsub_create(hidden_dir, hidden_dentry, ++ arg->u.c.mode, NULL, dlgt); ++ break; ++ case Symlink: ++ err = vfsub_symlink(hidden_dir, hidden_dentry, ++ arg->u.s.symname, S_IALLUGO, dlgt); ++ break; ++ case Mknod: ++ err = vfsub_mknod(hidden_dir, hidden_dentry, ++ arg->u.m.mode, arg->u.m.dev, dlgt); ++ break; ++ default: ++ BUG(); ++ } ++#else ++ err = -1; ++#endif ++ if (!err) ++ err = epilog(wh_dentry, dentry); ++ //err = -1; ++ ++ /* revert */ ++ if (unlikely(err && hidden_dentry->d_inode)) { ++ int rerr; ++ rerr = vfsub_unlink(hidden_dir, hidden_dentry, dlgt); ++ //rerr = -1; ++ if (rerr) { ++ IOErr("%.*s revert failure(%d, %d)\n", ++ DLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ dtime_revert(&dt, !CPUP_LOCKED_GHDIR); ++ d_drop(dentry); ++ } ++ ++ hdir_unlock(hidden_dir, dir, dbstart(dentry)); ++ dput(wh_dentry); ++ ++ out: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } ++ di_write_unlock(parent); ++ aufs_read_unlock(dentry, AUFS_D_WLOCK); ++ TraceErr(err); ++ return err; ++} ++ ++int aufs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++{ ++ struct simple_arg arg = { ++ .type = Mknod, ++ .u.m = {.mode = mode, .dev = dev} ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) ++{ ++ struct simple_arg arg = { ++ .type = Symlink, ++ .u.s.symname = symname ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++int aufs_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd) ++{ ++ struct simple_arg arg = { ++ .type = Creat, ++ .u.c = {.mode = mode, .nd = nd} ++ }; ++ return add_simple(dir, dentry, &arg); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct link_arg { ++ aufs_bindex_t bdst, bsrc; ++ int issamedir, dlgt; ++ struct dentry *src_parent, *parent, *hidden_dentry; ++ struct inode *hidden_dir, *inode; ++}; ++ ++static int cpup_before_link(struct dentry *src_dentry, struct inode *dir, ++ struct link_arg *a) ++{ ++ int err; ++ unsigned int flags; ++ struct inode *hi, *hdir = NULL, *src_dir; ++ ++ TraceEnter(); ++ ++ err = 0; ++ flags = au_flags_cpup(CPUP_DTIME, a->parent); ++ src_dir = a->src_parent->d_inode; ++ if (!a->issamedir) { ++ // todo: dead lock? ++ di_read_lock_parent2(a->src_parent, AUFS_I_RLOCK); ++ // this temporary unlock/lock is safe ++ hdir_unlock(a->hidden_dir, dir, a->bdst); ++ err = test_and_cpup_dirs(src_dentry, a->bdst, a->parent); ++ //err = -1; ++ if (!err) { ++ hdir = au_h_iptr_i(src_dir, a->bdst); ++ hdir_lock(hdir, src_dir, a->bdst); ++ flags = au_flags_cpup(CPUP_DTIME, a->src_parent); ++ } ++ } ++ ++ if (!err) { ++ hi = au_h_dptr(src_dentry)->d_inode; ++ hi_lock_child(hi); ++ err = sio_cpup_simple(src_dentry, a->bdst, -1, flags); ++ //err = -1; ++ i_unlock(hi); ++ } ++ ++ if (!a->issamedir) { ++ if (hdir) ++ hdir_unlock(hdir, src_dir, a->bdst); ++ hdir_lock(a->hidden_dir, dir, a->bdst); ++ di_read_unlock(a->src_parent, AUFS_I_RLOCK); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int cpup_or_link(struct dentry *src_dentry, struct link_arg *a) ++{ ++ int err; ++ struct inode *inode, *h_inode, *h_dst_inode; ++ struct dentry *h_dentry; ++ aufs_bindex_t bstart; ++ struct super_block *sb; ++ ++ TraceEnter(); ++ ++ sb = src_dentry->d_sb; ++ inode = src_dentry->d_inode; ++ h_dentry = au_h_dptr(src_dentry); ++ h_inode = h_dentry->d_inode; ++ bstart = ibstart(inode); ++ h_dst_inode = NULL; ++ if (bstart <= a->bdst) ++ h_dst_inode = au_h_iptr_i(inode, a->bdst); ++ ++ if (!h_dst_inode) { ++ /* copyup src_dentry as the name of dentry. */ ++ set_dbstart(src_dentry, a->bdst); ++ set_h_dptr(src_dentry, a->bdst, dget(a->hidden_dentry)); ++ hi_lock_child(h_inode); ++ err = sio_cpup_single(src_dentry, a->bdst, a->bsrc, -1, ++ au_flags_cpup(!CPUP_DTIME, a->parent)); ++ //err = -1; ++ i_unlock(h_inode); ++ set_h_dptr(src_dentry, a->bdst, NULL); ++ set_dbstart(src_dentry, a->bsrc); ++ } else { ++ /* the inode of src_dentry already exists on a.bdst branch */ ++ h_dentry = d_find_alias(h_dst_inode); ++ if (h_dentry) { ++ err = vfsub_link(h_dentry, a->hidden_dir, ++ a->hidden_dentry, a->dlgt); ++ dput(h_dentry); ++ } else { ++ IOErr("no dentry found for i%lu on b%d\n", ++ h_dst_inode->i_ino, a->bdst); ++ err = -EIO; ++ } ++ } ++ ++ if (!err) ++ append_plink(sb, a->inode, a->hidden_dentry, a->bdst); ++ ++ TraceErr(err); ++ return err; ++} ++ ++int aufs_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry) ++{ ++ int err, rerr; ++ struct dentry *hidden_parent, *wh_dentry, *hidden_src_dentry; ++ struct dtime dt; ++ struct link_arg a; ++ struct super_block *sb; ++ ++ LKTRTrace("src %.*s, i%lu, dst %.*s\n", ++ DLNPair(src_dentry), dir->i_ino, DLNPair(dentry)); ++ IMustLock(dir); ++ IMustLock(src_dentry->d_inode); ++ ++ aufs_read_and_write_lock2(dentry, src_dentry, /*isdir*/0); ++ a.src_parent = src_dentry->d_parent; ++ a.parent = dentry->d_parent; ++ a.issamedir = (a.src_parent == a.parent); ++ di_write_lock_parent(a.parent); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, !a.issamedir); ++ //wh_dentry = ERR_PTR(-1); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out; ++ ++ a.inode = src_dentry->d_inode; ++ a.hidden_dentry = au_h_dptr(dentry); ++ hidden_parent = a.hidden_dentry->d_parent; ++ a.hidden_dir = hidden_parent->d_inode; ++ IMustLock(a.hidden_dir); ++ ++ err = 0; ++ sb = dentry->d_sb; ++ a.dlgt = need_dlgt(sb); ++ ++ //todo: minor optimize, their sb may be same while their bindex differs. ++ a.bsrc = dbstart(src_dentry); ++ a.bdst = dbstart(dentry); ++ hidden_src_dentry = au_h_dptr(src_dentry); ++ if (unlikely(!au_flag_test(sb, AuFlag_PLINK))) { ++ /* ++ * copyup src_dentry to the branch we process, ++ * and then link(2) to it. ++ * gave up 'pseudo link by cpup' approach, ++ * since nlink may be one and some applications will not work. ++ */ ++ if (a.bdst < a.bsrc ++ /* && hidden_src_dentry->d_sb != a.hidden_dentry->d_sb */) ++ err = cpup_before_link(src_dentry, dir, &a); ++ if (!err) { ++ hidden_src_dentry = au_h_dptr(src_dentry); ++ err = vfsub_link(hidden_src_dentry, a.hidden_dir, ++ a.hidden_dentry, a.dlgt); ++ //err = -1; ++ } ++ } else { ++ if (a.bdst < a.bsrc ++ /* && hidden_src_dentry->d_sb != a.hidden_dentry->d_sb */) ++ err = cpup_or_link(src_dentry, &a); ++ else { ++ hidden_src_dentry = au_h_dptr(src_dentry); ++ err = vfsub_link(hidden_src_dentry, a.hidden_dir, ++ a.hidden_dentry, a.dlgt); ++ //err = -1; ++ } ++ } ++ if (unlikely(err)) ++ goto out_unlock; ++ if (wh_dentry) { ++ err = au_unlink_wh_dentry(a.hidden_dir, wh_dentry, dentry, ++ a.dlgt); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_revert; ++ } ++ ++ dir->i_version++; ++ if (ibstart(dir) == dbstart(dentry)) ++ au_cpup_attr_timesizes(dir); ++ if (!d_unhashed(a.hidden_dentry) ++ /* || hidden_old_inode->i_nlink <= nlink */ ++ /* || SB_NFS(hidden_src_dentry->d_sb) */) { ++ dentry->d_inode = igrab(a.inode); ++ d_instantiate(dentry, a.inode); ++ a.inode->i_nlink++; ++ a.inode->i_ctime = dir->i_ctime; ++ } else ++ /* nfs case (< 2.6.15) */ ++ d_drop(dentry); ++#if 0 ++ au_debug_on(); ++ DbgInode(a.inode); ++ au_debug_off(); ++ { ++ aufs_bindex_t i; ++ for (i = ibstart(a.inode); i <= ibend(a.inode); i++) { ++ struct xino xino; ++ struct inode *hi; ++ hi = au_h_iptr_i(a.inode, i); ++ if (hi) { ++ xino_read(sb, i, hi->i_ino, &xino); ++ Dbg("hi%lu, i%lu\n", hi->i_ino, xino.ino); ++ } ++ } ++ } ++#endif ++ goto out_unlock; /* success */ ++ ++ out_revert: ++#if 0 // remove ++ if (d_unhashed(a.hidden_dentry)) { ++ /* hardlink on nfs (< 2.6.15) */ ++ struct dentry *d; ++ const struct qstr *name = &a.hidden_dentry->d_name; ++ DEBUG_ON(a.hidden_dentry->d_parent->d_inode != a.hidden_dir); ++ // do not superio. ++ d = lkup_one(name->name, a.hidden_dentry->d_parent, name->len, ++ au_nfsmnt(sb, a.bdst)??, need_dlgt(sb)); ++ rerr = PTR_ERR(d); ++ if (IS_ERR(d)) ++ goto out_rerr; ++ dput(a.hidden_dentry); ++ a.hidden_dentry = d; ++ DEBUG_ON(!d->d_inode); ++ } ++#endif ++ rerr = vfsub_unlink(a.hidden_dir, a.hidden_dentry, a.dlgt); ++ //rerr = -1; ++ if (!rerr) ++ goto out_dt; ++// out_rerr: ++ IOErr("%.*s reverting failed(%d, %d)\n", DLNPair(dentry), err, rerr); ++ err = -EIO; ++ out_dt: ++ d_drop(dentry); ++ dtime_revert(&dt, !CPUP_LOCKED_GHDIR); ++ out_unlock: ++ hdir_unlock(a.hidden_dir, dir, a.bdst); ++ dput(wh_dentry); ++ out: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } ++ di_write_unlock(a.parent); ++ aufs_read_and_write_unlock2(dentry, src_dentry); ++ TraceErr(err); ++ return err; ++} ++ ++int aufs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ int err, rerr, diropq, dlgt; ++ struct dentry *hidden_dentry, *hidden_parent, *wh_dentry, *parent, ++ *opq_dentry; ++ struct inode *hidden_dir, *hidden_inode; ++ struct dtime dt; ++ aufs_bindex_t bindex; ++ struct super_block *sb; ++ ++ LKTRTrace("i%lu, %.*s, mode 0%o\n", dir->i_ino, DLNPair(dentry), mode); ++ IMustLock(dir); ++ ++ aufs_read_lock(dentry, AUFS_D_WLOCK); ++ parent = dentry->d_parent; ++ di_write_lock_parent(parent); ++ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, /*src_dentry*/NULL, ++ /*do_lock_srcdir*/0); ++ //wh_dentry = ERR_PTR(-1); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out; ++ ++ sb = dentry->d_sb; ++ bindex = dbstart(dentry); ++ hidden_dentry = au_h_dptr(dentry); ++ hidden_parent = hidden_dentry->d_parent; ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ dlgt = need_dlgt(sb); ++ ++ err = vfsub_mkdir(hidden_dir, hidden_dentry, mode, dlgt); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_unlock; ++ hidden_inode = hidden_dentry->d_inode; ++ ++ /* make the dir opaque */ ++ diropq = 0; ++ if (unlikely(wh_dentry || au_flag_test(sb, AuFlag_ALWAYS_DIROPQ))) { ++ hi_lock_child(hidden_inode); ++ opq_dentry = create_diropq(dentry, bindex, dlgt); ++ //opq_dentry = ERR_PTR(-1); ++ i_unlock(hidden_inode); ++ err = PTR_ERR(opq_dentry); ++ if (IS_ERR(opq_dentry)) ++ goto out_dir; ++ dput(opq_dentry); ++ diropq = 1; ++ } ++ ++ err = epilog(wh_dentry, dentry); ++ //err = -1; ++ if (!err) { ++ dir->i_nlink++; ++ goto out_unlock; /* success */ ++ } ++ ++ /* revert */ ++ if (unlikely(diropq)) { ++ LKTRLabel(revert opq); ++ hi_lock_child(hidden_inode); ++ rerr = remove_diropq(dentry, bindex, dlgt); ++ //rerr = -1; ++ i_unlock(hidden_inode); ++ if (rerr) { ++ IOErr("%.*s reverting diropq failed(%d, %d)\n", ++ DLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ } ++ ++ out_dir: ++ LKTRLabel(revert dir); ++ rerr = vfsub_rmdir(hidden_dir, hidden_dentry, dlgt); ++ //rerr = -1; ++ if (rerr) { ++ IOErr("%.*s reverting dir failed(%d, %d)\n", ++ DLNPair(dentry), err, rerr); ++ err = -EIO; ++ } ++ d_drop(dentry); ++ dtime_revert(&dt, /*fake flag*/CPUP_LOCKED_GHDIR); ++ out_unlock: ++ hdir_unlock(hidden_dir, dir, bindex); ++ dput(wh_dentry); ++ out: ++ if (unlikely(err)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } ++ di_write_unlock(parent); ++ aufs_read_unlock(dentry, AUFS_D_WLOCK); ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c +new file mode 100755 +index 0000000..f29b204 +--- /dev/null ++++ b/fs/aufs/i_op_del.c +@@ -0,0 +1,414 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: i_op_del.c,v 1.35 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++/* returns, ++ * 0: wh is unnecessary ++ * plus: wh is necessary ++ * minus: error ++ */ ++int wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup, ++ struct dentry *locked) ++{ ++ int need_wh, err; ++ aufs_bindex_t bstart; ++ struct dentry *hidden_dentry; ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s, isdir %d, *bcpup %d, locked %p\n", ++ DLNPair(dentry), isdir, *bcpup, locked); ++ sb = dentry->d_sb; ++ ++ bstart = dbstart(dentry); ++ LKTRTrace("bcpup %d, bstart %d\n", *bcpup, bstart); ++ hidden_dentry = au_h_dptr(dentry); ++ if (*bcpup < 0) { ++ *bcpup = bstart; ++ if (test_ro(sb, bstart, dentry->d_inode)) { ++ *bcpup = err = find_rw_parent_br(dentry, bstart); ++ //*bcpup = err = find_rw_br(sb, bstart); ++ //err = -1; ++ if (unlikely(err < 0)) ++ goto out; ++ } ++ } else { ++ /* braces are added to stop a warning */ ++ DEBUG_ON(bstart < *bcpup ++ || test_ro(sb, *bcpup, dentry->d_inode)); ++ } ++ LKTRTrace("bcpup %d, bstart %d\n", *bcpup, bstart); ++ ++ if (*bcpup != bstart) { ++ err = cpup_dirs(dentry, *bcpup, locked); ++ //err = -1; ++ if (unlikely(err)) ++ goto out; ++ need_wh = 1; ++ } else { ++ //struct nameidata nd; ++ aufs_bindex_t old_bend, new_bend, bdiropq = -1; ++ old_bend = dbend(dentry); ++ if (isdir) { ++ bdiropq = dbdiropq(dentry); ++ set_dbdiropq(dentry, -1); ++ } ++ err = need_wh = lkup_dentry(dentry, bstart + 1, /*type*/0); ++ //err = -1; ++ if (isdir) ++ set_dbdiropq(dentry, bdiropq); ++ if (unlikely(err < 0)) ++ goto out; ++ new_bend = dbend(dentry); ++ if (!need_wh && old_bend != new_bend) { ++ set_h_dptr(dentry, new_bend, NULL); ++ set_dbend(dentry, old_bend); ++#if 0 ++ } else if (!au_h_dptr_i(dentry, new_bend)->d_inode) { ++ LKTRTrace("negative\n"); ++ set_h_dptr(dentry, new_bend, NULL); ++ set_dbend(dentry, old_bend); ++ need_wh = 0; ++#endif ++ } ++ } ++ LKTRTrace("need_wh %d\n", need_wh); ++ err = need_wh; ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static struct dentry * ++lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup, ++ struct dtime *dt) ++{ ++ struct dentry *wh_dentry; ++ int err, need_wh; ++ struct dentry *hidden_parent, *parent; ++ struct inode *dir, *h_dir; ++ struct lkup_args lkup; ++ ++ LKTRTrace("%.*s, isdir %d\n", DLNPair(dentry), isdir); ++ ++ err = need_wh = wr_dir_need_wh(dentry, isdir, bcpup, NULL); ++ //err = -1; ++ wh_dentry = ERR_PTR(err); ++ if (unlikely(err < 0)) ++ goto out; ++ ++ parent = dentry->d_parent; ++ dir = parent->d_inode; ++ hidden_parent = au_h_dptr_i(parent, *bcpup); ++ h_dir = hidden_parent->d_inode; ++ hdir_lock(h_dir, dir, *bcpup); ++ dtime_store(dt, parent, hidden_parent); ++ if (!need_wh) ++ return NULL; /* success, no need to create whiteout */ ++ ++ lkup.nfsmnt = au_nfsmnt(dentry->d_sb, *bcpup); ++ lkup.dlgt = need_dlgt(dentry->d_sb); ++ wh_dentry = simple_create_wh(dentry, *bcpup, hidden_parent, &lkup); ++ //wh_dentry = ERR_PTR(-1); ++ if (!IS_ERR(wh_dentry)) ++ goto out; /* success */ ++ /* returns with the parent is locked and wh_dentry is DGETed */ ++ ++ hdir_unlock(h_dir, dir, *bcpup); ++ ++ out: ++ TraceErrPtr(wh_dentry); ++ return wh_dentry; ++} ++ ++static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex, ++ struct aufs_nhash *whlist, struct inode *dir) ++{ ++ int rmdir_later, err; ++ struct dentry *hidden_dentry; ++ ++ LKTRTrace("%.*s, b%d\n", DLNPair(dentry), bindex); ++ ++ err = rename_whtmp(dentry, bindex); ++ //err = -1; ++#if 0 ++ //todo: bug ++ if (unlikely(err)) { ++ au_direval_inc(dentry->d_parent); ++ return err; ++ } ++#endif ++ ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (!au_is_nfs(hidden_dentry->d_sb)) { ++ const int dirwh = stosi(dentry->d_sb)->si_dirwh; ++ rmdir_later = (dirwh <= 1); ++ if (!rmdir_later) ++ rmdir_later = is_longer_wh(whlist, bindex, dirwh); ++ if (rmdir_later) ++ return rmdir_later; ++ } ++ ++ err = rmdir_whtmp(hidden_dentry, whlist, bindex, dir, dentry->d_inode); ++ //err = -1; ++ if (unlikely(err)) { ++ IOErr("rmdir %.*s, b%d failed, %d. ignored\n", ++ DLNPair(hidden_dentry), bindex, err); ++ err = 0; ++ } ++ TraceErr(err); ++ return err; ++} ++ ++static void epilog(struct inode *dir, struct dentry *dentry, ++ aufs_bindex_t bindex) ++{ ++ d_drop(dentry); ++ dentry->d_inode->i_ctime = dir->i_ctime; ++ if (atomic_read(&dentry->d_count) == 1) { ++ set_h_dptr(dentry, dbstart(dentry), NULL); ++ au_update_dbstart(dentry); ++ } ++ if (ibstart(dir) == bindex) ++ au_cpup_attr_timesizes(dir); ++ dir->i_version++; ++} ++ ++static int do_revert(int err, struct dentry *wh_dentry, struct dentry *dentry, ++ aufs_bindex_t bwh, struct dtime *dt, int dlgt) ++{ ++ int rerr; ++ ++ rerr = au_unlink_wh_dentry(wh_dentry->d_parent->d_inode, wh_dentry, ++ dentry, dlgt); ++ //rerr = -1; ++ if (!rerr) { ++ set_dbwh(dentry, bwh); ++ dtime_revert(dt, !CPUP_LOCKED_GHDIR); ++ return 0; ++ } ++ ++ IOErr("%.*s reverting whiteout failed(%d, %d)\n", ++ DLNPair(dentry), err, rerr); ++ return -EIO; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int aufs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ int err, dlgt; ++ struct inode *inode, *hidden_dir; ++ struct dentry *parent, *wh_dentry, *hidden_dentry, *hidden_parent; ++ struct dtime dt; ++ aufs_bindex_t bwh, bindex, bstart; ++ struct super_block *sb; ++ ++ LKTRTrace("i%lu, %.*s\n", dir->i_ino, DLNPair(dentry)); ++ IMustLock(dir); ++ inode = dentry->d_inode; ++ if (unlikely(!inode)) ++ return -ENOENT; // possible? ++ IMustLock(inode); ++ ++ aufs_read_lock(dentry, AUFS_D_WLOCK); ++ parent = dentry->d_parent; ++ di_write_lock_parent(parent); ++ ++ bstart = dbstart(dentry); ++ bwh = dbwh(dentry); ++ bindex = -1; ++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &dt); ++ //wh_dentry = ERR_PTR(-1); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out; ++ ++ sb = dir->i_sb; ++ dlgt = need_dlgt(sb); ++ hidden_dentry = au_h_dptr(dentry); ++ dget(hidden_dentry); ++ hidden_parent = hidden_dentry->d_parent; ++ hidden_dir = hidden_parent->d_inode; ++ ++ if (bindex == bstart) { ++ err = vfsub_unlink(hidden_dir, hidden_dentry, dlgt); ++ //err = -1; ++ } else { ++ DEBUG_ON(!wh_dentry); ++ hidden_parent = wh_dentry->d_parent; ++ DEBUG_ON(hidden_parent != au_h_dptr_i(parent, bindex)); ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ err = 0; ++ } ++ ++ if (!err) { ++ inode->i_nlink--; ++ epilog(dir, dentry, bindex); ++#if 0 ++ xino_write0(sb, bstart, hidden_dentry->d_inode->i_ino); ++ /* ignore this error */ ++#endif ++ goto out_unlock; /* success */ ++ } ++ ++ /* revert */ ++ if (wh_dentry) { ++ int rerr; ++ rerr = do_revert(err, wh_dentry, dentry, bwh, &dt, dlgt); ++ if (rerr) ++ err = rerr; ++ } ++ ++ out_unlock: ++ hdir_unlock(hidden_dir, dir, bindex); ++ dput(wh_dentry); ++ dput(hidden_dentry); ++ out: ++ di_write_unlock(parent); ++ aufs_read_unlock(dentry, AUFS_D_WLOCK); ++ TraceErr(err); ++ return err; ++} ++ ++int aufs_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ int err, rmdir_later; ++ struct inode *inode, *hidden_dir; ++ struct dentry *parent, *wh_dentry, *hidden_dentry, *hidden_parent; ++ struct dtime dt; ++ aufs_bindex_t bwh, bindex, bstart; ++ struct rmdir_whtmp_arg *arg; ++ struct aufs_nhash *whlist; ++ struct super_block *sb; ++ ++ LKTRTrace("i%lu, %.*s\n", dir->i_ino, DLNPair(dentry)); ++ IMustLock(dir); ++ inode = dentry->d_inode; ++ if (unlikely(!inode)) ++ return -ENOENT; // possible? ++ IMustLock(inode); ++ ++ whlist = nhash_new(GFP_KERNEL); ++ err = PTR_ERR(whlist); ++ if (IS_ERR(whlist)) ++ goto out; ++ ++ err = -ENOMEM; ++ arg = kmalloc(sizeof(*arg), GFP_KERNEL); ++ //arg = NULL; ++ if (unlikely(!arg)) ++ goto out_whlist; ++ ++ aufs_read_lock(dentry, AUFS_D_WLOCK); ++ parent = dentry->d_parent; ++ di_write_lock_parent(parent); ++ err = test_empty(dentry, whlist); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_arg; ++ ++ bstart = dbstart(dentry); ++ bwh = dbwh(dentry); ++ bindex = -1; ++ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/ 1, &bindex, &dt); ++ //wh_dentry = ERR_PTR(-1); ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out_arg; ++ ++ hidden_dentry = au_h_dptr(dentry); ++ dget(hidden_dentry); ++ hidden_parent = hidden_dentry->d_parent; ++ hidden_dir = hidden_parent->d_inode; ++ ++ rmdir_later = 0; ++ if (bindex == bstart) { ++ IMustLock(hidden_dir); ++ err = renwh_and_rmdir(dentry, bstart, whlist, dir); ++ //err = -1; ++ if (err > 0) { ++ rmdir_later = err; ++ err = 0; ++ } ++ } else { ++ DEBUG_ON(!wh_dentry); ++ hidden_parent = wh_dentry->d_parent; ++ DEBUG_ON(hidden_parent != au_h_dptr_i(parent, bindex)); ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ err = 0; ++ } ++ ++ sb = dentry->d_sb; ++ if (!err) { ++ //aufs_bindex_t bi, bend; ++ ++ au_reset_hinotify(inode, /*flags*/0); ++ inode->i_nlink = 0; ++ set_dbdiropq(dentry, -1); ++ epilog(dir, dentry, bindex); ++ ++ if (rmdir_later) { ++ kick_rmdir_whtmp(hidden_dentry, whlist, bstart, dir, ++ inode, arg); ++ arg = NULL; ++ } ++ ++#if 0 ++ bend = dbend(dentry); ++ for (bi = bstart; bi <= bend; bi++) { ++ struct dentry *hd; ++ hd = au_h_dptr_i(dentry, bi); ++ if (hd && hd->d_inode) ++ xino_write0(sb, bi, hd->d_inode->i_ino); ++ /* ignore this error */ ++ } ++#endif ++ ++ goto out_unlock; /* success */ ++ } ++ ++ /* revert */ ++ LKTRLabel(revert); ++ if (wh_dentry) { ++ int rerr; ++ rerr = do_revert(err, wh_dentry, dentry, bwh, &dt, ++ need_dlgt(sb)); ++ if (rerr) ++ err = rerr; ++ } ++ ++ out_unlock: ++ hdir_unlock(hidden_dir, dir, bindex); ++ dput(wh_dentry); ++ dput(hidden_dentry); ++ out_arg: ++ di_write_unlock(parent); ++ aufs_read_unlock(dentry, AUFS_D_WLOCK); ++ kfree(arg); ++ out_whlist: ++ nhash_del(whlist); ++ out: ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c +new file mode 100755 +index 0000000..08137f9 +--- /dev/null ++++ b/fs/aufs/i_op_ren.c +@@ -0,0 +1,637 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: i_op_ren.c,v 1.39 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++//#include <linux/fs.h> ++//#include <linux/namei.h> ++#include "aufs.h" ++ ++enum {SRC, DST}; ++struct rename_args { ++ struct dentry *hidden_dentry[2], *parent[2], *hidden_parent[2]; ++ struct aufs_nhash whlist; ++ aufs_bindex_t btgt, bstart[2]; ++ struct super_block *sb; ++ ++ unsigned int isdir:1; ++ unsigned int issamedir:1; ++ unsigned int whsrc:1; ++ unsigned int whdst:1; ++ unsigned int dlgt:1; ++} __attribute__((aligned(sizeof(long)))); ++ ++static int do_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry, ++ struct rename_args *a) ++{ ++ int err, need_diropq, bycpup, rerr; ++ struct rmdir_whtmp_arg *tharg; ++ struct dentry *wh_dentry[2], *hidden_dst, *hg_parent; ++ struct inode *hidden_dir[2]; ++ aufs_bindex_t bindex, bend; ++ unsigned int flags; ++ struct lkup_args lkup = {.dlgt = a->dlgt}; ++ ++ LKTRTrace("%.*s/%.*s, %.*s/%.*s, " ++ "hd{%p, %p}, hp{%p, %p}, wh %p, btgt %d, bstart{%d, %d}, " ++ "flags{%d, %d, %d, %d}\n", ++ DLNPair(a->parent[SRC]), DLNPair(src_dentry), ++ DLNPair(a->parent[DST]), DLNPair(dentry), ++ a->hidden_dentry[SRC], a->hidden_dentry[DST], ++ a->hidden_parent[SRC], a->hidden_parent[DST], ++ &a->whlist, a->btgt, ++ a->bstart[SRC], a->bstart[DST], ++ a->isdir, a->issamedir, a->whsrc, a->whdst); ++ hidden_dir[SRC] = a->hidden_parent[SRC]->d_inode; ++ hidden_dir[DST] = a->hidden_parent[DST]->d_inode; ++ IMustLock(hidden_dir[SRC]); ++ IMustLock(hidden_dir[DST]); ++ ++ /* prepare workqueue arg */ ++ hidden_dst = NULL; ++ tharg = NULL; ++ if (a->isdir && a->hidden_dentry[DST]->d_inode) { ++ err = -ENOMEM; ++ tharg = kmalloc(sizeof(*tharg), GFP_KERNEL); ++ //tharg = NULL; ++ if (unlikely(!tharg)) ++ goto out; ++ hidden_dst = dget(a->hidden_dentry[DST]); ++ } ++ ++ wh_dentry[SRC] = wh_dentry[DST] = NULL; ++ lkup.nfsmnt = au_nfsmnt(a->sb, a->btgt); ++ /* create whiteout for src_dentry */ ++ if (a->whsrc) { ++ wh_dentry[SRC] = simple_create_wh(src_dentry, a->btgt, ++ a->hidden_parent[SRC], &lkup); ++ //wh_dentry[SRC] = ERR_PTR(-1); ++ err = PTR_ERR(wh_dentry[SRC]); ++ if (IS_ERR(wh_dentry[SRC])) ++ goto out_tharg; ++ } ++ ++ /* lookup whiteout for dentry */ ++ if (a->whdst) { ++ struct dentry *d; ++ d = lkup_wh(a->hidden_parent[DST], &dentry->d_name, &lkup); ++ //d = ERR_PTR(-1); ++ err = PTR_ERR(d); ++ if (IS_ERR(d)) ++ goto out_whsrc; ++ if (!d->d_inode) ++ dput(d); ++ else ++ wh_dentry[DST] = d; ++ } ++ ++ /* rename dentry to tmpwh */ ++ if (tharg) { ++ err = rename_whtmp(dentry, a->btgt); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_whdst; ++ set_h_dptr(dentry, a->btgt, NULL); ++ err = lkup_neg(dentry, a->btgt); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_whtmp; ++ a->hidden_dentry[DST] = au_h_dptr_i(dentry, a->btgt); ++ } ++ ++ /* cpup src */ ++ if (a->hidden_dentry[DST]->d_inode && a->bstart[SRC] != a->btgt) { ++ flags = au_flags_cpup(!CPUP_DTIME, a->parent[SRC]); ++ hg_parent = a->hidden_parent[SRC]->d_parent; ++ if (!(flags & CPUP_LOCKED_GHDIR) ++ && hg_parent == a->hidden_parent[DST]) ++ flags |= CPUP_LOCKED_GHDIR; ++ ++ hi_lock_child(a->hidden_dentry[SRC]->d_inode); ++ err = sio_cpup_simple(src_dentry, a->btgt, -1, flags); ++ //err = -1; // untested dir ++ i_unlock(a->hidden_dentry[SRC]->d_inode); ++ if (unlikely(err)) ++ goto out_whtmp; ++ } ++ ++#if 0 ++ /* clear the target ino in xino */ ++ LKTRTrace("dir %d, dst inode %p\n", a->isdir, a->hidden_dentry[DST]->d_inode); ++ if (a->isdir && a->hidden_dentry[DST]->d_inode) { ++ Dbg("here\n"); ++ err = xino_write(a->sb, a->btgt, ++ a->hidden_dentry[DST]->d_inode->i_ino, 0); ++ if (unlikely(err)) ++ goto out_whtmp; ++ } ++#endif ++ ++ /* rename by vfs_rename or cpup */ ++ need_diropq = a->isdir ++ && (wh_dentry[DST] ++ || dbdiropq(dentry) == a->btgt ++ || au_flag_test(a->sb, AuFlag_ALWAYS_DIROPQ)); ++ bycpup = 0; ++ if (dbstart(src_dentry) == a->btgt) { ++ if (need_diropq && dbdiropq(src_dentry) == a->btgt) ++ need_diropq = 0; ++ err = vfsub_rename(hidden_dir[SRC], au_h_dptr(src_dentry), ++ hidden_dir[DST], a->hidden_dentry[DST], ++ a->dlgt); ++ //err = -1; ++ } else { ++ bycpup = 1; ++ flags = au_flags_cpup(!CPUP_DTIME, a->parent[DST]); ++ hg_parent = a->hidden_parent[DST]->d_parent; ++ if (!(flags & CPUP_LOCKED_GHDIR) ++ && hg_parent == a->hidden_parent[SRC]) ++ flags |= CPUP_LOCKED_GHDIR; ++ ++ hi_lock_child(a->hidden_dentry[SRC]->d_inode); ++ set_dbstart(src_dentry, a->btgt); ++ set_h_dptr(src_dentry, a->btgt, dget(a->hidden_dentry[DST])); ++ //DbgDentry(src_dentry); ++ //DbgInode(src_dentry->d_inode); ++ err = sio_cpup_single(src_dentry, a->btgt, a->bstart[SRC], -1, ++ flags); ++ //err = -1; // untested dir ++ if (unlikely(err)) { ++ set_h_dptr(src_dentry, a->btgt, NULL); ++ set_dbstart(src_dentry, a->bstart[SRC]); ++ } ++ i_unlock(a->hidden_dentry[SRC]->d_inode); ++ } ++ if (unlikely(err)) ++ goto out_whtmp; ++ ++ /* make dir opaque */ ++ if (need_diropq) { ++ struct dentry *diropq; ++ struct inode *h_inode; ++ ++ h_inode = au_h_dptr_i(src_dentry, a->btgt)->d_inode; ++ hdir_lock(h_inode, src_dentry->d_inode, a->btgt); ++ diropq = create_diropq(src_dentry, a->btgt, a->dlgt); ++ //diropq = ERR_PTR(-1); ++ hdir_unlock(h_inode, src_dentry->d_inode, a->btgt); ++ err = PTR_ERR(diropq); ++ if (IS_ERR(diropq)) ++ goto out_rename; ++ dput(diropq); ++ } ++ ++ /* remove whiteout for dentry */ ++ if (wh_dentry[DST]) { ++ err = au_unlink_wh_dentry(hidden_dir[DST], wh_dentry[DST], ++ dentry, a->dlgt); ++ //err = -1; ++ if (unlikely(err)) ++ goto out_diropq; ++ } ++ ++ /* remove whtmp */ ++ if (tharg) { ++ if (au_is_nfs(hidden_dst->d_sb) ++ || !is_longer_wh(&a->whlist, a->btgt, ++ stosi(a->sb)->si_dirwh)) { ++ err = rmdir_whtmp(hidden_dst, &a->whlist, a->btgt, dir, ++ dentry->d_inode); ++ if (unlikely(err)) ++ Warn("failed removing whtmp dir %.*s (%d), " ++ "ignored.\n", DLNPair(hidden_dst), err); ++ } else { ++ kick_rmdir_whtmp(hidden_dst, &a->whlist, a->btgt, dir, ++ dentry->d_inode, tharg); ++ dput(hidden_dst); ++ tharg = NULL; ++ } ++ } ++ err = 0; ++ goto out_success; ++ ++#define RevertFailure(fmt, args...) do { \ ++ IOErrWhck("revert failure: " fmt " (%d, %d)\n", \ ++ ##args, err, rerr); \ ++ err = -EIO; \ ++ } while(0) ++ ++ out_diropq: ++ if (need_diropq) { ++ struct inode *h_inode; ++ ++ h_inode = au_h_dptr_i(src_dentry, a->btgt)->d_inode; ++ // i_lock simplly since inotify is not set to h_inode. ++ hi_lock_parent(h_inode); ++ //hdir_lock(h_inode, src_dentry->d_inode, a->btgt); ++ rerr = remove_diropq(src_dentry, a->btgt, a->dlgt); ++ //rerr = -1; ++ //hdir_unlock(h_inode, src_dentry->d_inode, a->btgt); ++ i_unlock(h_inode); ++ if (rerr) ++ RevertFailure("remove diropq %.*s", ++ DLNPair(src_dentry)); ++ } ++ out_rename: ++ if (!bycpup) { ++ struct dentry *d; ++ struct qstr *name = &src_dentry->d_name; ++ d = lkup_one(name->name, a->hidden_parent[SRC], name->len, ++ &lkup); ++ //d = ERR_PTR(-1); ++ rerr = PTR_ERR(d); ++ if (IS_ERR(d)) { ++ RevertFailure("lkup_one %.*s", DLNPair(src_dentry)); ++ goto out_whtmp; ++ } ++ DEBUG_ON(d->d_inode); ++ rerr = vfsub_rename ++ (hidden_dir[DST], au_h_dptr_i(src_dentry, a->btgt), ++ hidden_dir[SRC], d, a->dlgt); ++ //rerr = -1; ++ d_drop(d); ++ dput(d); ++ //set_h_dptr(src_dentry, a->btgt, NULL); ++ if (rerr) ++ RevertFailure("rename %.*s", DLNPair(src_dentry)); ++ } else { ++ rerr = vfsub_unlink(hidden_dir[DST], a->hidden_dentry[DST], ++ a->dlgt); ++ //rerr = -1; ++ set_h_dptr(src_dentry, a->btgt, NULL); ++ set_dbstart(src_dentry, a->bstart[SRC]); ++ if (rerr) ++ RevertFailure("unlink %.*s", ++ DLNPair(a->hidden_dentry[DST])); ++ } ++ out_whtmp: ++ if (tharg) { ++ struct dentry *d; ++ struct qstr *name = &dentry->d_name; ++ LKTRLabel(here); ++ d = lkup_one(name->name, a->hidden_parent[DST], name->len, ++ &lkup); ++ //d = ERR_PTR(-1); ++ rerr = PTR_ERR(d); ++ if (IS_ERR(d)) { ++ RevertFailure("lookup %.*s", LNPair(name)); ++ goto out_whdst; ++ } ++ if (d->d_inode) { ++ d_drop(d); ++ dput(d); ++ goto out_whdst; ++ } ++ DEBUG_ON(d->d_inode); ++ rerr = vfsub_rename(hidden_dir[DST], hidden_dst, ++ hidden_dir[DST], d, a->dlgt); ++ //rerr = -1; ++ d_drop(d); ++ dput(d); ++ if (rerr) { ++ RevertFailure("rename %.*s", DLNPair(hidden_dst)); ++ goto out_whdst; ++ } ++ set_h_dptr(dentry, a->btgt, NULL); ++ set_h_dptr(dentry, a->btgt, dget(hidden_dst)); ++ } ++ out_whdst: ++ dput(wh_dentry[DST]); ++ wh_dentry[DST] = NULL; ++ out_whsrc: ++ if (wh_dentry[SRC]) { ++ LKTRLabel(here); ++ rerr = au_unlink_wh_dentry(hidden_dir[SRC], wh_dentry[SRC], ++ src_dentry, a->dlgt); ++ //rerr = -1; ++ if (rerr) ++ RevertFailure("unlink %.*s", DLNPair(wh_dentry[SRC])); ++ } ++#undef RevertFailure ++ d_drop(src_dentry); ++ bend = dbend(src_dentry); ++ for (bindex = dbstart(src_dentry); bindex <= bend; bindex++) { ++ struct dentry *hd; ++ hd = au_h_dptr_i(src_dentry, bindex); ++ if (hd) ++ d_drop(hd); ++ } ++ d_drop(dentry); ++ bend = dbend(dentry); ++ for (bindex = dbstart(dentry); bindex <= bend; bindex++) { ++ struct dentry *hd; ++ hd = au_h_dptr_i(dentry, bindex); ++ if (hd) ++ d_drop(hd); ++ } ++ au_update_dbstart(dentry); ++ if (tharg) ++ d_drop(hidden_dst); ++ out_success: ++ dput(wh_dentry[SRC]); ++ dput(wh_dentry[DST]); ++ out_tharg: ++ if (tharg) { ++ dput(hidden_dst); ++ kfree(tharg); ++ } ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * test if @dentry dir can be rename destination or not. ++ * success means, it is a logically empty dir. ++ */ ++static int may_rename_dstdir(struct dentry *dentry, aufs_bindex_t btgt, ++ struct aufs_nhash *whlist) ++{ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ ++ return test_empty(dentry, whlist); ++} ++ ++/* ++ * test if @dentry dir can be rename source or not. ++ * if it can, return 0 and @children is filled. ++ * success means, ++ * - or, it is a logically empty dir. ++ * - or, it exists on writable branch and has no children including whiteouts ++ * on the lower branch. ++ */ ++static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt) ++{ ++ int err; ++ aufs_bindex_t bstart; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ ++ bstart = dbstart(dentry); ++ if (bstart != btgt) { ++ struct aufs_nhash *whlist; ++ ++ whlist = nhash_new(GFP_KERNEL); ++ err = PTR_ERR(whlist); ++ if (IS_ERR(whlist)) ++ goto out; ++ err = test_empty(dentry, whlist); ++ nhash_del(whlist); ++ goto out; ++ } ++ ++ if (bstart == dbtaildir(dentry)) ++ return 0; /* success */ ++ ++ err = au_test_empty_lower(dentry); ++ ++ out: ++ if (/* unlikely */(err == -ENOTEMPTY)) ++ err = -EXDEV; ++ TraceErr(err); ++ return err; ++} ++ ++int aufs_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry) ++{ ++ int err, do_dt_dstdir; ++ aufs_bindex_t bend, bindex; ++ struct inode *inode, *dirs[2]; ++ enum {PARENT, CHILD}; ++ /* reduce stack space */ ++ struct { ++ struct rename_args a; ++ struct dtime dt[2][2]; ++ } *p; ++ ++ LKTRTrace("i%lu, %.*s, i%lu, %.*s\n", ++ src_dir->i_ino, DLNPair(src_dentry), ++ dir->i_ino, DLNPair(dentry)); ++ IMustLock(src_dir); ++ IMustLock(dir); ++ /* braces are added to stop a warning */ ++ if (dentry->d_inode) { ++ IMustLock(dentry->d_inode); ++ } ++ ++ err = -ENOMEM; ++ BUILD_BUG_ON(sizeof(*p) > PAGE_SIZE); ++ p = kmalloc(sizeof(*p), GFP_KERNEL); ++ if (unlikely(!p)) ++ goto out; ++ ++ err = -ENOTDIR; ++ p->a.sb = src_dentry->d_sb; ++ inode = src_dentry->d_inode; ++ p->a.isdir = !!S_ISDIR(inode->i_mode); ++ if (unlikely(p->a.isdir && dentry->d_inode ++ && !S_ISDIR(dentry->d_inode->i_mode))) ++ goto out_free; ++ ++ aufs_read_and_write_lock2(dentry, src_dentry, p->a.isdir); ++ p->a.dlgt = !!need_dlgt(p->a.sb); ++ p->a.parent[SRC] = p->a.parent[DST] = dentry->d_parent; ++ p->a.issamedir = (src_dir == dir); ++ if (p->a.issamedir) ++ di_write_lock_parent(p->a.parent[DST]); ++ else { ++ p->a.parent[SRC] = src_dentry->d_parent; ++ di_write_lock2_parent(p->a.parent[SRC], p->a.parent[DST], ++ /*isdir*/1); ++ } ++ ++ /* which branch we process */ ++ p->a.bstart[DST] = dbstart(dentry); ++ p->a.btgt = err = wr_dir(dentry, 1, src_dentry, /*force_btgt*/-1, ++ /*do_lock_srcdir*/0); ++ if (unlikely(err < 0)) ++ goto out_unlock; ++ ++ /* are they available to be renamed */ ++ err = 0; ++ nhash_init(&p->a.whlist); ++ if (p->a.isdir && dentry->d_inode) { ++ set_dbstart(dentry, p->a.bstart[DST]); ++ err = may_rename_dstdir(dentry, p->a.btgt, &p->a.whlist); ++ set_dbstart(dentry, p->a.btgt); ++ } ++ p->a.hidden_dentry[DST] = au_h_dptr(dentry); ++ if (unlikely(err)) ++ goto out_unlock; ++ //todo: minor optimize, their sb may be same while their bindex differs. ++ p->a.bstart[SRC] = dbstart(src_dentry); ++ p->a.hidden_dentry[SRC] = au_h_dptr(src_dentry); ++ if (p->a.isdir) { ++ err = may_rename_srcdir(src_dentry, p->a.btgt); ++ if (unlikely(err)) ++ goto out_children; ++ } ++ ++ /* prepare the writable parent dir on the same branch */ ++ err = wr_dir_need_wh(src_dentry, p->a.isdir, &p->a.btgt, ++ p->a.issamedir ? NULL : p->a.parent[DST]); ++ if (unlikely(err < 0)) ++ goto out_children; ++ p->a.whsrc = !!err; ++ p->a.whdst = (p->a.bstart[DST] == p->a.btgt); ++ if (!p->a.whdst) { ++ err = cpup_dirs(dentry, p->a.btgt, ++ p->a.issamedir ? NULL : p->a.parent[SRC]); ++ if (unlikely(err)) ++ goto out_children; ++ } ++ ++ p->a.hidden_parent[SRC] = au_h_dptr_i(p->a.parent[SRC], p->a.btgt); ++ p->a.hidden_parent[DST] = au_h_dptr_i(p->a.parent[DST], p->a.btgt); ++ dirs[0] = src_dir; ++ dirs[1] = dir; ++ hdir_lock_rename(p->a.hidden_parent, dirs, p->a.btgt, p->a.issamedir); ++ ++ /* store timestamps to be revertible */ ++ dtime_store(p->dt[PARENT] + SRC, p->a.parent[SRC], ++ p->a.hidden_parent[SRC]); ++ if (!p->a.issamedir) ++ dtime_store(p->dt[PARENT] + DST, p->a.parent[DST], ++ p->a.hidden_parent[DST]); ++ do_dt_dstdir = 0; ++ if (p->a.isdir) { ++ dtime_store(p->dt[CHILD] + SRC, src_dentry, ++ p->a.hidden_dentry[SRC]); ++ if (p->a.hidden_dentry[DST]->d_inode) { ++ do_dt_dstdir = 1; ++ dtime_store(p->dt[CHILD] + DST, dentry, ++ p->a.hidden_dentry[DST]); ++ } ++ } ++ ++ err = do_rename(src_dir, src_dentry, dir, dentry, &p->a); ++ if (unlikely(err)) ++ goto out_dt; ++ hdir_unlock_rename(p->a.hidden_parent, dirs, p->a.btgt, p->a.issamedir); ++ ++ /* update dir attributes */ ++ dir->i_version++; ++ if (p->a.isdir) ++ au_cpup_attr_nlink(dir); ++ if (ibstart(dir) == p->a.btgt) ++ au_cpup_attr_timesizes(dir); ++ ++ if (!p->a.issamedir) { ++ src_dir->i_version++; ++ if (p->a.isdir) ++ au_cpup_attr_nlink(src_dir); ++ if (ibstart(src_dir) == p->a.btgt) ++ au_cpup_attr_timesizes(src_dir); ++ } ++ ++ // is this updating defined in POSIX? ++ if (unlikely(p->a.isdir)) { ++ //i_lock(inode); ++ au_cpup_attr_timesizes(inode); ++ //i_unlock(inode); ++ } ++ ++#if 0 ++ d_drop(src_dentry); ++#else ++ /* dput/iput all lower dentries */ ++ set_dbwh(src_dentry, -1); ++ bend = dbend(src_dentry); ++ for (bindex = p->a.btgt + 1; bindex <= bend; bindex++) { ++ struct dentry *hd; ++ hd = au_h_dptr_i(src_dentry, bindex); ++ if (hd) ++ set_h_dptr(src_dentry, bindex, NULL); ++ } ++ set_dbend(src_dentry, p->a.btgt); ++ ++ bend = ibend(inode); ++ for (bindex = p->a.btgt + 1; bindex <= bend; bindex++) { ++ struct inode *hi; ++ hi = au_h_iptr_i(inode, bindex); ++ if (hi) ++ set_h_iptr(inode, bindex, NULL, 0); ++ } ++ set_ibend(inode, p->a.btgt); ++#endif ++ ++#if 0 ++ //au_debug_on(); ++ //DbgDentry(dentry); ++ //DbgInode(dentry->d_inode); ++ //au_debug_off(); ++ inode = dentry->d_inode; ++ if (inode) { ++ aufs_bindex_t bindex, bend; ++ struct dentry *hd; ++ bend = dbend(dentry); ++ for (bindex = dbstart(dentry); bindex <= bend; bindex++) { ++ hd = au_h_dptr_i(dentry, bindex); ++ if (hd && hd->d_inode) ++ xino_write0(p->a.sb, bindex, hd->d_inode->i_ino); ++ /* ignore this error */ ++ } ++ } ++#endif ++ ++ goto out_children; /* success */ ++ ++ out_dt: ++ dtime_revert(p->dt[PARENT] + SRC, ++ p->a.hidden_parent[SRC]->d_parent ++ == p->a.hidden_parent[DST]); ++ if (!p->a.issamedir) ++ dtime_revert(p->dt[PARENT] + DST, ++ p->a.hidden_parent[DST]->d_parent ++ == p->a.hidden_parent[SRC]); ++ if (p->a.isdir && err != -EIO) { ++ struct dentry *hd; ++ ++ hd = p->dt[CHILD][SRC].dt_h_dentry; ++ hi_lock_child(hd->d_inode); ++ dtime_revert(p->dt[CHILD] + SRC, 1); ++ i_unlock(hd->d_inode); ++ if (do_dt_dstdir) { ++ hd = p->dt[CHILD][DST].dt_h_dentry; ++ hi_lock_child(hd->d_inode); ++ dtime_revert(p->dt[CHILD] + DST, 1); ++ i_unlock(hd->d_inode); ++ } ++ } ++ hdir_unlock_rename(p->a.hidden_parent, dirs, p->a.btgt, p->a.issamedir); ++ out_children: ++ nhash_fin(&p->a.whlist); ++ out_unlock: ++ //if (unlikely(err /* && p->a.isdir */)) { ++ if (unlikely(err && p->a.isdir)) { ++ au_update_dbstart(dentry); ++ d_drop(dentry); ++ } ++ if (p->a.issamedir) ++ di_write_unlock(p->a.parent[DST]); ++ else ++ di_write_unlock2(p->a.parent[SRC], p->a.parent[DST]); ++ aufs_read_and_write_unlock2(dentry, src_dentry); ++ out_free: ++ kfree(p); ++ out: ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c +new file mode 100755 +index 0000000..9efbd38 +--- /dev/null ++++ b/fs/aufs/iinfo.c +@@ -0,0 +1,286 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: iinfo.c,v 1.31 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++//#include <linux/mm.h> ++#include "aufs.h" ++ ++struct aufs_iinfo *itoii(struct inode *inode) ++{ ++ struct aufs_iinfo *iinfo; ++ ++ iinfo = &(container_of(inode, struct aufs_icntnr, vfs_inode)->iinfo); ++ /* bad_inode case */ ++ if (unlikely(!iinfo->ii_hinode)) ++ return NULL; ++ DEBUG_ON(!iinfo->ii_hinode ++ /* || stosi(inode->i_sb)->si_bend < iinfo->ii_bend */ ++ || iinfo->ii_bend < iinfo->ii_bstart); ++ return iinfo; ++} ++ ++aufs_bindex_t ibstart(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return itoii(inode)->ii_bstart; ++} ++ ++aufs_bindex_t ibend(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ return itoii(inode)->ii_bend; ++} ++ ++struct aufs_vdir *ivdir(struct inode *inode) ++{ ++ IiMustAnyLock(inode); ++ DEBUG_ON(!S_ISDIR(inode->i_mode)); ++ return itoii(inode)->ii_vdir; ++} ++ ++struct inode *au_h_iptr_i(struct inode *inode, aufs_bindex_t bindex) ++{ ++ struct inode *hidden_inode; ++ ++ IiMustAnyLock(inode); ++ DEBUG_ON(bindex < 0 || ibend(inode) < bindex); ++ hidden_inode = itoii(inode)->ii_hinode[0 + bindex].hi_inode; ++ DEBUG_ON(hidden_inode && atomic_read(&hidden_inode->i_count) <= 0); ++ return hidden_inode; ++} ++ ++struct inode *au_h_iptr(struct inode *inode) ++{ ++ return au_h_iptr_i(inode, ibstart(inode)); ++} ++ ++aufs_bindex_t itoid_index(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustAnyLock(inode); ++ DEBUG_ON(bindex < 0 ++ || ibend(inode) < bindex ++ || !itoii(inode)->ii_hinode[0 + bindex].hi_inode); ++ return itoii(inode)->ii_hinode[0 + bindex].hi_id; ++} ++ ++// hard/soft set ++void set_ibstart(struct inode *inode, aufs_bindex_t bindex) ++{ ++ struct aufs_iinfo *iinfo = itoii(inode); ++ struct inode *h_inode; ++ ++ IiMustWriteLock(inode); ++ DEBUG_ON(sbend(inode->i_sb) < bindex); ++ iinfo->ii_bstart = bindex; ++ h_inode = iinfo->ii_hinode[bindex + 0].hi_inode; ++ if (h_inode) ++ au_cpup_igen(inode, h_inode); ++} ++ ++void set_ibend(struct inode *inode, aufs_bindex_t bindex) ++{ ++ IiMustWriteLock(inode); ++ DEBUG_ON(sbend(inode->i_sb) < bindex ++ || bindex < ibstart(inode)); ++ itoii(inode)->ii_bend = bindex; ++} ++ ++void set_ivdir(struct inode *inode, struct aufs_vdir *vdir) ++{ ++ IiMustWriteLock(inode); ++ DEBUG_ON(!S_ISDIR(inode->i_mode) ++ || (itoii(inode)->ii_vdir && vdir)); ++ itoii(inode)->ii_vdir = vdir; ++} ++ ++void aufs_hiput(struct aufs_hinode *hinode) ++{ ++ if (unlikely(hinode->hi_notify)) ++ do_free_hinotify(hinode); ++ if (hinode->hi_inode) ++ iput(hinode->hi_inode); ++} ++ ++unsigned int au_hi_flags(struct inode *inode, int isdir) ++{ ++ unsigned int flags; ++ struct super_block *sb = inode->i_sb; ++ ++ flags = 0; ++ if (au_flag_test(sb, AuFlag_XINO)) ++ flags = AUFS_HI_XINO; ++ if (unlikely(isdir && au_flag_test(sb, AuFlag_UDBA_INOTIFY))) ++ flags |= AUFS_HI_NOTIFY; ++ return flags; ++} ++ ++void set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags) ++{ ++ struct aufs_hinode *hinode; ++ struct inode *hi; ++ struct aufs_iinfo *iinfo = itoii(inode); ++ ++ LKTRTrace("i%lu, b%d, hi%lu, flags 0x%x\n", ++ inode->i_ino, bindex, h_inode ? h_inode->i_ino : 0, flags); ++ IiMustWriteLock(inode); ++ hinode = iinfo->ii_hinode + bindex; ++ hi = hinode->hi_inode; ++ DEBUG_ON(bindex < ibstart(inode) || ibend(inode) < bindex ++ || (h_inode && atomic_read(&h_inode->i_count) <= 0) ++ || (h_inode && hi)); ++ ++ if (hi) ++ aufs_hiput(hinode); ++ hinode->hi_inode = h_inode; ++ if (h_inode) { ++ int err; ++ struct super_block *sb = inode->i_sb; ++ ++ if (bindex == iinfo->ii_bstart) ++ au_cpup_igen(inode, h_inode); ++ hinode->hi_id = sbr_id(sb, bindex); ++ if (flags & AUFS_HI_XINO) { ++ struct xino xino = { ++ .ino = inode->i_ino, ++ //.h_gen = h_inode->i_generation ++ }; ++ //WARN_ON(xino.h_gen == AuXino_INVALID_HGEN); ++ err = xino_write(sb, bindex, h_inode->i_ino, &xino); ++ if (unlikely(err)) { ++ IOErr1("failed xino_write() %d, force noxino\n", ++ err); ++ au_flag_clr(sb, AuFlag_XINO); ++ } ++ } ++ if (flags & AUFS_HI_NOTIFY) { ++ err = alloc_hinotify(hinode, inode, h_inode); ++ if (unlikely(err)) ++ IOErr1("alloc_hinotify() %d\n", err); ++ else { ++ /* braces are added to stop a warning */ ++ DEBUG_ON(!hinode->hi_notify); ++ } ++ } ++ } ++} ++ ++void au_update_iigen(struct inode *inode) ++{ ++ //IiMustWriteLock(inode); ++ DEBUG_ON(!inode->i_sb); ++ atomic_set(&itoii(inode)->ii_generation, au_sigen(inode->i_sb)); ++} ++ ++/* it may be called at remount time, too */ ++void au_update_brange(struct inode *inode, int do_put_zero) ++{ ++ struct aufs_iinfo *iinfo; ++ ++ LKTRTrace("i%lu, %d\n", inode->i_ino, do_put_zero); ++ IiMustWriteLock(inode); ++ ++ iinfo = itoii(inode); ++ if (unlikely(!iinfo) || iinfo->ii_bstart < 0) ++ return; ++ ++ if (do_put_zero) { ++ aufs_bindex_t bindex; ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; ++ bindex++) { ++ struct inode *h_i; ++ h_i = iinfo->ii_hinode[0 + bindex].hi_inode; ++ if (h_i && !h_i->i_nlink) ++ set_h_iptr(inode, bindex, NULL, 0); ++ } ++ } ++ ++ iinfo->ii_bstart = -1; ++ while (++iinfo->ii_bstart <= iinfo->ii_bend) ++ if (iinfo->ii_hinode[0 + iinfo->ii_bstart].hi_inode) ++ break; ++ if (iinfo->ii_bstart > iinfo->ii_bend) { ++ iinfo->ii_bend = iinfo->ii_bstart = -1; ++ return; ++ } ++ ++ iinfo->ii_bend++; ++ while (0 <= --iinfo->ii_bend) ++ if (iinfo->ii_hinode[0 + iinfo->ii_bend].hi_inode) ++ break; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int au_iinfo_init(struct inode *inode) ++{ ++ struct aufs_iinfo *iinfo; ++ struct super_block *sb; ++ int nbr, i; ++ ++ sb = inode->i_sb; ++ DEBUG_ON(!sb); ++ iinfo = &(container_of(inode, struct aufs_icntnr, vfs_inode)->iinfo); ++ DEBUG_ON(iinfo->ii_hinode); ++ nbr = sbend(sb) + 1; ++ if (unlikely(!nbr)) ++ nbr++; ++ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_KERNEL); ++ //iinfo->ii_hinode = NULL; ++ if (iinfo->ii_hinode) { ++ for (i = 0; i < nbr; i++) ++ iinfo->ii_hinode[i].hi_id = -1; ++ atomic_set(&iinfo->ii_generation, au_sigen(sb)); ++ rw_init_nolock(&iinfo->ii_rwsem); ++ iinfo->ii_bstart = -1; ++ iinfo->ii_bend = -1; ++ iinfo->ii_vdir = NULL; ++ return 0; ++ } ++ return -ENOMEM; ++} ++ ++void au_iinfo_fin(struct inode *inode) ++{ ++ struct aufs_iinfo *iinfo; ++ ++ iinfo = itoii(inode); ++ /* bad_inode case */ ++ if (unlikely(!iinfo)) ++ return; ++ ++ if (unlikely(iinfo->ii_vdir)) ++ free_vdir(iinfo->ii_vdir); ++ ++ if (iinfo->ii_bstart >= 0) { ++ aufs_bindex_t bend; ++ struct aufs_hinode *hi; ++ hi = iinfo->ii_hinode + iinfo->ii_bstart; ++ bend = iinfo->ii_bend; ++ while (iinfo->ii_bstart++ <= bend) { ++ if (hi->hi_inode) ++ aufs_hiput(hi); ++ hi++; ++ } ++ //iinfo->ii_bstart = iinfo->ii_bend = -1; ++ } ++ ++ kfree(iinfo->ii_hinode); ++ //iinfo->ii_hinode = NULL; ++} +diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c +new file mode 100755 +index 0000000..f18b5d8 +--- /dev/null ++++ b/fs/aufs/inode.c +@@ -0,0 +1,339 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: inode.c,v 1.22 2007/05/07 03:44:35 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry) ++{ ++ int err, new_sz, update, isdir; ++ struct inode *first; ++ struct aufs_hinode *p, *q, tmp; ++ struct super_block *sb; ++ struct aufs_iinfo *iinfo; ++ aufs_bindex_t bindex, bend, new_bindex; ++ unsigned int flags; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ IiMustWriteLock(inode); ++ ++ err = -ENOMEM; ++ sb = dentry->d_sb; ++ bend = sbend(sb); ++ new_sz = sizeof(*iinfo->ii_hinode) * (bend + 1); ++ iinfo = itoii(inode); ++ p = au_kzrealloc(iinfo->ii_hinode, sizeof(*p) * (iinfo->ii_bend + 1), ++ new_sz, GFP_KERNEL); ++ //p = NULL; ++ if (unlikely(!p)) ++ goto out; ++ ++ iinfo->ii_hinode = p; ++ err = 0; ++ update = 0; ++ p = iinfo->ii_hinode + iinfo->ii_bstart; ++ first = p->hi_inode; ++ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; ++ bindex++, p++) { ++ if (unlikely(!p->hi_inode)) ++ continue; ++ ++ new_bindex = find_brindex(sb, p->hi_id); ++ if (new_bindex == bindex) ++ continue; ++ if (new_bindex < 0) { ++ update++; ++ aufs_hiput(p); ++ p->hi_inode = NULL; ++ continue; ++ } ++ ++ if (new_bindex < iinfo->ii_bstart) ++ iinfo->ii_bstart = new_bindex; ++ if (iinfo->ii_bend < new_bindex) ++ iinfo->ii_bend = new_bindex; ++ /* swap two hidden inode, and loop again */ ++ q = iinfo->ii_hinode + new_bindex; ++ tmp = *q; ++ *q = *p; ++ *p = tmp; ++ if (tmp.hi_inode) { ++ bindex--; ++ p--; ++ } ++ } ++ ++ isdir = S_ISDIR(inode->i_mode); ++ flags = au_hi_flags(inode, isdir); ++ bend = dbend(dentry); ++ for (bindex = dbstart(dentry); bindex <= bend; bindex++) { ++ struct inode *hi; ++ struct dentry *hd; ++ ++ hd = au_h_dptr_i(dentry, bindex); ++ if (!hd || !hd->d_inode) ++ continue; ++ ++ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) { ++ hi = au_h_iptr_i(inode, bindex); ++ if (hi) { ++ if (hi == hd->d_inode) ++ continue; ++ //Dbg("here\n"); ++ err = -ESTALE; ++ break; ++ } ++ } ++ if (bindex < iinfo->ii_bstart) ++ iinfo->ii_bstart = bindex; ++ if (iinfo->ii_bend < bindex) ++ iinfo->ii_bend = bindex; ++ set_h_iptr(inode, bindex, igrab(hd->d_inode), flags); ++ update++; ++ } ++ ++ bend = iinfo->ii_bend; ++ p = iinfo->ii_hinode; ++ for (bindex = 0; bindex <= bend; bindex++, p++) ++ if (p->hi_inode) { ++ iinfo->ii_bstart = bindex; ++ break; ++ } ++ p = iinfo->ii_hinode + bend; ++ for (bindex = bend; bindex > iinfo->ii_bstart; bindex--, p--) ++ if (p->hi_inode) { ++ iinfo->ii_bend = bindex; ++ break; ++ } ++ DEBUG_ON(iinfo->ii_bstart > bend || iinfo->ii_bend < 0); ++ ++ if (unlikely(err)) ++ goto out; ++ ++ if (1 || first != au_h_iptr(inode)) ++ au_cpup_attr_all(inode); ++ if (update && isdir) ++ inode->i_version++; ++ au_update_iigen(inode); ++ ++ out: ++ //au_debug_on(); ++ TraceErr(err); ++ //au_debug_off(); ++ return err; ++} ++ ++static int set_inode(struct inode *inode, struct dentry *dentry) ++{ ++ int err, isdir; ++ struct dentry *hidden_dentry; ++ struct inode *hidden_inode; ++ umode_t mode; ++ aufs_bindex_t bindex, bstart, btail; ++ struct aufs_iinfo *iinfo; ++ unsigned int flags; ++ ++ LKTRTrace("i%lu, %.*s\n", inode->i_ino, DLNPair(dentry)); ++ DEBUG_ON(!(inode->i_state & I_NEW)); ++ IiMustWriteLock(inode); ++ hidden_dentry = au_h_dptr(dentry); ++ DEBUG_ON(!hidden_dentry); ++ hidden_inode = hidden_dentry->d_inode; ++ DEBUG_ON(!hidden_inode); ++ ++ err = 0; ++ isdir = 0; ++ bstart = dbstart(dentry); ++ mode = hidden_inode->i_mode; ++ switch (mode & S_IFMT) { ++ case S_IFREG: ++ btail = dbtail(dentry); ++ break; ++ case S_IFDIR: ++ isdir = 1; ++ btail = dbtaildir(dentry); ++ inode->i_op = &aufs_dir_iop; ++ inode->i_fop = &aufs_dir_fop; ++ break; ++ case S_IFLNK: ++ btail = dbtail(dentry); ++ inode->i_op = &aufs_symlink_iop; ++ break; ++ case S_IFBLK: ++ case S_IFCHR: ++ case S_IFIFO: ++ case S_IFSOCK: ++ btail = dbtail(dentry); ++ init_special_inode(inode, mode, hidden_inode->i_rdev); ++ break; ++ default: ++ IOErr("Unknown file type 0%o\n", mode); ++ err = -EIO; ++ goto out; ++ } ++ ++ flags = au_hi_flags(inode, isdir); ++ iinfo = itoii(inode); ++ iinfo->ii_bstart = bstart; ++ iinfo->ii_bend = btail; ++ for (bindex = bstart; bindex <= btail; bindex++) { ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ DEBUG_ON(!hidden_dentry->d_inode); ++ set_h_iptr(inode, bindex, igrab(hidden_dentry->d_inode), flags); ++ } ++ au_cpup_attr_all(inode); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* successful returns with iinfo write_locked */ ++//todo: return with unlocked? ++static int reval_inode(struct inode *inode, struct dentry *dentry, int *matched) ++{ ++ int err; ++ struct inode *h_inode, *h_dinode; ++ aufs_bindex_t bindex, bend; ++ //const int udba = !au_flag_test(inode->i_sb, AuFlag_UDBA_NONE); ++ ++ LKTRTrace("i%lu, %.*s\n", inode->i_ino, DLNPair(dentry)); ++ ++ *matched = 0; ++ ++ /* ++ * before this function, if aufs got any iinfo lock, it must be only ++ * one, the parent dir. ++ * it can happen by UDBA and the obsoleted inode number. ++ */ ++ err = -EIO; ++ if (unlikely(inode->i_ino == parent_ino(dentry))) ++ goto out; ++ ++ h_dinode = au_h_dptr(dentry)->d_inode; ++ hi_lock_child(inode); // bad name, this is not a hidden inode. ++ ii_write_lock_new(inode); ++ bend = ibend(inode); ++ for (bindex = ibstart(inode); bindex <= bend; bindex++) { ++ h_inode = au_h_iptr_i(inode, bindex); ++ if (h_inode && h_inode == h_dinode) { ++ //&& (ibs != bstart || !au_test_higen(inode, h_inode))); ++ *matched = 1; ++ err = 0; ++ if (unlikely(au_iigen(inode) != au_digen(dentry))) ++ err = au_refresh_hinode(inode, dentry); ++ break; ++ } ++ } ++ i_unlock(inode); ++ if (unlikely(err)) ++ ii_write_unlock(inode); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* successful returns with iinfo write_locked */ ++//todo: return with unlocked? ++struct inode *au_new_inode(struct dentry *dentry) ++{ ++ struct inode *inode, *h_inode; ++ struct dentry *h_dentry; ++ ino_t h_ino; ++ struct super_block *sb; ++ int err, match; ++ aufs_bindex_t bstart; ++ struct xino xino; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ sb = dentry->d_sb; ++ h_dentry = au_h_dptr(dentry); ++ DEBUG_ON(!h_dentry); ++ h_inode = h_dentry->d_inode; ++ DEBUG_ON(!h_inode); ++ ++ bstart = dbstart(dentry); ++ h_ino = h_inode->i_ino; ++ err = xino_read(sb, bstart, h_ino, &xino); ++ //err = -1; ++ inode = ERR_PTR(err); ++ if (unlikely(err)) ++ goto out; ++ new_ino: ++ if (!xino.ino) { ++ xino.ino = xino_new_ino(sb); ++ if (!xino.ino) { ++ inode = ERR_PTR(-EIO); ++ goto out; ++ } ++ } ++ ++ LKTRTrace("i%lu\n", xino.ino); ++ err = -ENOMEM; ++ inode = iget_locked(sb, xino.ino); ++ if (unlikely(!inode)) ++ goto out; ++ err = PTR_ERR(inode); ++ if (IS_ERR(inode)) ++ goto out; ++ err = -ENOMEM; ++ if (unlikely(is_bad_inode(inode))) ++ goto out_iput; ++ ++ LKTRTrace("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW)); ++ if (inode->i_state & I_NEW) { ++ sb->s_op->read_inode(inode); ++ if (!is_bad_inode(inode)) { ++ ii_write_lock_new(inode); ++ err = set_inode(inode, dentry); ++ //err = -1; ++ } ++ unlock_new_inode(inode); ++ if (!err) ++ goto out; /* success */ ++ ii_write_unlock(inode); ++ goto out_iput; ++ } else { ++ err = reval_inode(inode, dentry, &match); ++ if (!err) ++ goto out; /* success */ ++ else if (match) ++ goto out_iput; ++ } ++ ++ Warn1("broken ino, b%d, %.*s/%.*s, hi%lu, i%lu. Try udba=inotify.\n", ++ bstart, DLNPair(dentry->d_parent), DLNPair(dentry), h_ino, ++ xino.ino); ++ xino.ino = 0; ++ err = xino_write0(sb, bstart, h_ino); ++ if (!err) { ++ iput(inode); ++ goto new_ino; ++ } ++ ++ out_iput: ++ iput(inode); ++ inode = ERR_PTR(err); ++ out: ++ TraceErrPtr(inode); ++ return inode; ++} +diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h +new file mode 100755 +index 0000000..b001ac3 +--- /dev/null ++++ b/fs/aufs/inode.h +@@ -0,0 +1,377 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: inode.h,v 1.32 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_INODE_H__ ++#define __AUFS_INODE_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/inotify.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++#include "misc.h" ++#include "vfsub.h" ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++#else ++struct inotify_watch {}; ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct aufs_hinotify { ++ struct inotify_watch hin_watch; ++ struct inode *hin_aufs_inode; /* no get/put */ ++}; ++ ++struct aufs_hinode { ++ struct inode *hi_inode; ++ aufs_bindex_t hi_id; ++ struct aufs_hinotify *hi_notify; ++}; ++ ++struct aufs_vdir; ++struct aufs_iinfo { ++ atomic_t ii_generation; ++ struct super_block *ii_hsb1; /* no get/put */ ++ ++ struct aufs_rwsem ii_rwsem; ++ aufs_bindex_t ii_bstart, ii_bend; ++ struct aufs_hinode *ii_hinode; ++ struct aufs_vdir *ii_vdir; ++}; ++ ++struct aufs_icntnr { ++ struct aufs_iinfo iinfo; ++ struct inode vfs_inode; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* inode.c */ ++int au_refresh_hinode(struct inode *inode, struct dentry *dentry); ++struct inode *au_new_inode(struct dentry *dentry); ++ ++/* i_op.c */ ++extern struct inode_operations aufs_iop, aufs_symlink_iop, aufs_dir_iop; ++int wr_dir(struct dentry *dentry, int negative, struct dentry *src_dentry, ++ aufs_bindex_t force_btgt, int do_lock_srcdir); ++ ++/* i_op_del.c */ ++int wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup, ++ struct dentry *locked); ++ ++/* iinfo.c */ ++struct aufs_iinfo *itoii(struct inode *inode); ++aufs_bindex_t ibstart(struct inode *inode); ++aufs_bindex_t ibend(struct inode *inode); ++struct aufs_vdir *ivdir(struct inode *inode); ++struct inode *au_h_iptr_i(struct inode *inode, aufs_bindex_t bindex); ++struct inode *au_h_iptr(struct inode *inode); ++aufs_bindex_t itoid_index(struct inode *inode, aufs_bindex_t bindex); ++ ++void set_ibstart(struct inode *inode, aufs_bindex_t bindex); ++void set_ibend(struct inode *inode, aufs_bindex_t bindex); ++void set_ivdir(struct inode *inode, struct aufs_vdir *vdir); ++void aufs_hiput(struct aufs_hinode *hinode); ++#define AUFS_HI_XINO 1 ++#define AUFS_HI_NOTIFY 2 ++unsigned int au_hi_flags(struct inode *inode, int isdir); ++void set_h_iptr(struct inode *inode, aufs_bindex_t bindex, ++ struct inode *h_inode, unsigned int flags); ++void au_update_iigen(struct inode *inode); ++void au_update_brange(struct inode *inode, int do_put_zero); ++ ++int au_iinfo_init(struct inode *inode); ++void au_iinfo_fin(struct inode *inode); ++ ++/* plink.c */ ++#ifdef CONFIG_AUFS_DEBUG ++void au_list_plink(struct super_block *sb); ++#else ++static inline void au_list_plink(struct super_block *sb) ++{ ++ /* nothing */ ++} ++#endif ++int au_is_plinked(struct super_block *sb, struct inode *inode); ++struct dentry *lkup_plink(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode); ++void append_plink(struct super_block *sb, struct inode *inode, ++ struct dentry *h_dentry, aufs_bindex_t bindex); ++void au_put_plink(struct super_block *sb); ++void half_refresh_plink(struct super_block *sb, aufs_bindex_t br_id); ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* lock subclass for hidden inode */ ++/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */ ++// todo: reduce it by dcsub. ++enum { ++ AuLsc_Begin = I_MUTEX_QUOTA, ++ AuLsc_HI_GPARENT, /* setattr with inotify */ ++ AuLsc_HI_PARENT, /* hidden inode, parent first */ ++ AuLsc_HI_CHILD, ++ AuLsc_HI_PARENT2, /* copyup dirs */ ++ AuLsc_HI_CHILD2, ++ AuLsc_End ++}; ++ ++/* simple abstraction */ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ++static inline void i_lock(struct inode *i) ++{ ++ down(&i->i_sem); ++} ++ ++static inline void i_unlock(struct inode *i) ++{ ++ up(&i->i_sem); ++} ++ ++static inline int i_trylock(struct inode *i) ++{ ++ return down_trylock(&i->i_sem); ++} ++ ++static inline void hi_lock(struct inode *i, unsigned int lsc) ++{ ++ i_lock(i); ++} ++ ++#define IMustLock(i) DEBUG_ON(!down_trylock(&(i)->i_sem)) ++#else ++static inline void i_lock(struct inode *i) ++{ ++ mutex_lock(&i->i_mutex); ++} ++ ++static inline void i_unlock(struct inode *i) ++{ ++ mutex_unlock(&i->i_mutex); ++} ++ ++static inline int i_trylock(struct inode *i) ++{ ++ return mutex_trylock(&i->i_mutex); ++} ++ ++static inline void hi_lock(struct inode *i, unsigned int lsc) ++{ ++ mutex_lock_nested(&i->i_mutex, lsc); ++} ++ ++#define IMustLock(i) MtxMustLock(&(i)->i_mutex) ++#endif ++ ++/* ++ * hi_lock_gparent, hi_lock_parent, hi_lock_parent2, hi_lock_child, ++ * hi_lock_child2, hi_lock_whplink ++ */ ++#define LockFunc(name, lsc) \ ++static inline void hi_lock_##name(struct inode *h_i) \ ++{hi_lock(h_i, AuLsc_HI_##lsc);} ++ ++LockFunc(gparent, GPARENT); ++LockFunc(parent, PARENT); ++LockFunc(parent2, PARENT2); ++LockFunc(child, CHILD); ++LockFunc(child2, CHILD2); ++LockFunc(whplink, CHILD2); /* sharing lock-subclass */ ++ ++#undef LockFunc ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* tiny test for inode number */ ++/* tmpfs generation is too rough */ ++static inline int au_test_higen(struct inode *inode, struct inode *h_inode) ++{ ++ //IiMustAnyLock(inode); ++ return !(itoii(inode)->ii_hsb1 == h_inode->i_sb ++ && inode->i_generation == h_inode->i_generation); ++} ++ ++static inline int au_iigen(struct inode *inode) ++{ ++ return atomic_read(&itoii(inode)->ii_generation); ++} ++ ++#ifdef CONFIG_AUFS_HINOTIFY ++static inline void au_iigen_dec(struct inode *inode) ++{ ++ //Dbg("i%lu\n", inode->i_ino); ++ atomic_dec(&itoii(inode)->ii_generation); ++} ++ ++/* hinotify.c */ ++int alloc_hinotify(struct aufs_hinode *hinode, struct inode *inode, ++ struct inode *h_inode); ++void do_free_hinotify(struct aufs_hinode *hinode); ++void do_hdir_lock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex, ++ unsigned int lsc); ++void hdir_unlock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex); ++void hdir_lock_rename(struct dentry **h_parents, struct inode **dirs, ++ aufs_bindex_t bindex, int issamedir); ++void hdir_unlock_rename(struct dentry **h_parents, struct inode **dirs, ++ aufs_bindex_t bindex, int issamedir); ++void au_reset_hinotify(struct inode *inode, unsigned int flags); ++int __init au_inotify_init(void); ++void au_inotify_fin(void); ++#else ++static inline ++int alloc_hinotify(struct aufs_hinode *hinode, struct inode *inode, ++ struct inode *h_inode) ++{ ++ return -EOPNOTSUPP; ++} ++ ++static inline void do_free_hinotify(struct aufs_hinode *hinode) ++{ ++ /* nothing */ ++} ++ ++static inline ++void do_hdir_lock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex, ++ unsigned int lsc) ++{ ++ hi_lock(h_dir, lsc); ++} ++ ++static inline ++void hdir_unlock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex) ++{ ++ i_unlock(h_dir); ++} ++ ++static inline ++void hdir_lock_rename(struct dentry **h_parents, struct inode **dirs, ++ aufs_bindex_t bindex, int issamedir) ++{ ++ vfsub_lock_rename(h_parents[0], h_parents[1]); ++} ++ ++static inline ++void hdir_unlock_rename(struct dentry **h_parents, struct inode **dirs, ++ aufs_bindex_t bindex, int issamedir) ++{ ++ vfsub_unlock_rename(h_parents[0], h_parents[1]); ++} ++ ++static inline void au_reset_hinotify(struct inode *inode, unsigned int flags) ++{ ++ /* nothing */ ++} ++ ++#define au_inotify_init() 0 ++#define au_inotify_fin() /* */ ++#endif /* CONFIG_AUFS_HINOTIFY */ ++ ++static inline void free_hinotify(struct inode *inode, aufs_bindex_t bindex) ++{ ++ do_free_hinotify(itoii(inode)->ii_hinode + bindex); ++} ++ ++/* ++ * hgdir_lock, hdir_lock, hdir2_lock ++ */ ++#define LockFunc(name, lsc) \ ++static inline \ ++void name##_lock(struct inode *h_dir, struct inode *dir, aufs_bindex_t bindex) \ ++{do_hdir_lock(h_dir, dir, bindex, AuLsc_HI_##lsc);} ++ ++LockFunc(hgdir, GPARENT); ++LockFunc(hdir, PARENT); ++LockFunc(hdir2, PARENT2); ++ ++#undef LockFunc ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* lock subclass for iinfo */ ++enum { ++ AuLsc_II_CHILD, /* child first */ ++ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hinotify */ ++ AuLsc_II_CHILD3, /* copyup dirs */ ++ AuLsc_II_PARENT, ++ AuLsc_II_PARENT2, ++ AuLsc_II_PARENT3, ++ AuLsc_II_NEW /* new inode */ ++}; ++ ++/* ++ * ii_read_lock_child, ii_write_lock_child, ++ * ii_read_lock_child2, ii_write_lock_child2, ++ * ii_read_lock_child3, ii_write_lock_child3, ++ * ii_read_lock_parent, ii_write_lock_parent, ++ * ii_read_lock_parent2, ii_write_lock_parent2, ++ * ii_read_lock_parent3, ii_write_lock_parent3, ++ * ii_read_lock_new, ii_write_lock_new ++ */ ++#define ReadLockFunc(name, lsc) \ ++static inline void ii_read_lock_##name(struct inode *i) \ ++{rw_read_lock_nested(&itoii(i)->ii_rwsem, AuLsc_II_##lsc);} ++ ++#define WriteLockFunc(name, lsc) \ ++static inline void ii_write_lock_##name(struct inode *i) \ ++{rw_write_lock_nested(&itoii(i)->ii_rwsem, AuLsc_II_##lsc);} ++ ++#define RWLockFuncs(name, lsc) \ ++ ReadLockFunc(name, lsc); \ ++ WriteLockFunc(name, lsc) ++ ++RWLockFuncs(child, CHILD); ++RWLockFuncs(child2, CHILD2); ++RWLockFuncs(child3, CHILD3); ++RWLockFuncs(parent, PARENT); ++RWLockFuncs(parent2, PARENT2); ++RWLockFuncs(parent3, PARENT3); ++RWLockFuncs(new, NEW); ++ ++#undef ReadLockFunc ++#undef WriteLockFunc ++#undef RWLockFunc ++ ++/* ++ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock ++ */ ++SimpleUnlockRwsemFuncs(ii, struct inode *i, itoii(i)->ii_rwsem); ++ ++/* to debug easier, do not make them inlined functions */ ++#define IiMustReadLock(i) do { \ ++ SiMustAnyLock((i)->i_sb); \ ++ RwMustReadLock(&itoii(i)->ii_rwsem); \ ++} while (0) ++ ++#define IiMustWriteLock(i) do { \ ++ SiMustAnyLock((i)->i_sb); \ ++ RwMustWriteLock(&itoii(i)->ii_rwsem); \ ++} while (0) ++ ++#define IiMustAnyLock(i) do { \ ++ SiMustAnyLock((i)->i_sb); \ ++ RwMustAnyLock(&itoii(i)->ii_rwsem); \ ++} while (0) ++ ++#define IiMustNoWaiters(i) RwMustNoWaiters(&itoii(i)->ii_rwsem) ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_INODE_H__ */ +diff --git a/fs/aufs/misc.c b/fs/aufs/misc.c +new file mode 100755 +index 0000000..32e0549 +--- /dev/null ++++ b/fs/aufs/misc.c +@@ -0,0 +1,228 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: misc.c,v 1.31 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++//#include <linux/fs.h> ++//#include <linux/namei.h> ++//#include <linux/mm.h> ++//#include <asm/uaccess.h> ++#include "aufs.h" ++ ++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp) ++{ ++ void *q; ++ ++ LKTRTrace("p %p, nused %d, sz %d, ksize %d\n", ++ p, nused, new_sz, ksize(p)); ++ DEBUG_ON(new_sz <= 0); ++ if (new_sz <= nused) ++ return p; ++ if (new_sz <= ksize(p)) { ++ memset(p + nused, 0, new_sz - nused); ++ return p; ++ } ++ ++ q = kmalloc(new_sz, gfp); ++ //q = NULL; ++ if (unlikely(!q)) ++ return NULL; ++ memcpy(q, p, nused); ++ memset(q + nused, 0, new_sz - nused); ++ //smp_mb(); ++ kfree(p); ++ return q; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++// todo: make it inline ++struct nameidata *fake_dm(struct nameidata *fake_nd, struct nameidata *nd, ++ struct super_block *sb, aufs_bindex_t bindex) ++{ ++ LKTRTrace("nd %p, b%d\n", nd, bindex); ++ ++ if (!nd) ++ return NULL; ++ ++ fake_nd->dentry = NULL; ++ fake_nd->mnt = NULL; ++ ++#ifndef CONFIG_AUFS_FAKE_DM ++ DiMustAnyLock(nd->dentry); ++ ++ if (bindex <= dbend(nd->dentry)) ++ fake_nd->dentry = au_h_dptr_i(nd->dentry, bindex); ++ if (fake_nd->dentry) { ++ dget(fake_nd->dentry); ++ fake_nd->mnt = sbr_mnt(sb, bindex); ++ DEBUG_ON(!fake_nd->mnt); ++ mntget(fake_nd->mnt); ++ } else ++ fake_nd = ERR_PTR(-ENOENT); ++#endif ++ ++ TraceErrPtr(fake_nd); ++ return fake_nd; ++} ++ ++void fake_dm_release(struct nameidata *fake_nd) ++{ ++#ifndef CONFIG_AUFS_FAKE_DM ++ if (fake_nd) { ++ mntput(fake_nd->mnt); ++ dput(fake_nd->dentry); ++ } ++#endif ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int au_copy_file(struct file *dst, struct file *src, loff_t len, ++ struct super_block *sb, int *sparse) ++{ ++ int err, all_zero, dlgt; ++ unsigned long blksize; ++ char *buf; ++ /* reduce stack space */ ++ struct iattr *ia; ++ ++ LKTRTrace("%.*s, %.*s\n", ++ DLNPair(dst->f_dentry), DLNPair(src->f_dentry)); ++ DEBUG_ON(!(dst->f_mode & FMODE_WRITE)); ++ IMustLock(dst->f_dentry->d_parent->d_inode); ++ ++ err = -ENOMEM; ++ blksize = dst->f_dentry->d_sb->s_blocksize; ++ if (!blksize || PAGE_SIZE < blksize) ++ blksize = PAGE_SIZE; ++ LKTRTrace("blksize %lu\n", blksize); ++ buf = kmalloc(blksize, GFP_KERNEL); ++ //buf = NULL; ++ if (unlikely(!buf)) ++ goto out; ++ ia = kmalloc(sizeof(*ia), GFP_KERNEL); ++ if (unlikely(!ia)) ++ goto out_buf; ++ ++ dlgt = need_dlgt(sb); ++ err = all_zero = 0; ++ dst->f_pos = src->f_pos = 0; ++ while (len) { ++ size_t sz, rbytes, wbytes, i; ++ char *p; ++ ++ LKTRTrace("len %lld\n", len); ++ sz = blksize; ++ if (len < blksize) ++ sz = len; ++ ++ /* support LSM and notify */ ++ rbytes = 0; ++ while (!rbytes || err == -EAGAIN || err == -EINTR) ++ err = rbytes = vfsub_read_k(src, buf, sz, &src->f_pos, ++ dlgt); ++ if (unlikely(err < 0)) ++ break; ++ ++ all_zero = 0; ++ if (len >= rbytes && rbytes == blksize) { ++ all_zero = 1; ++ p = buf; ++ for (i = 0; all_zero && i < rbytes; i++) ++ all_zero = !*p++; ++ } ++ if (!all_zero) { ++ wbytes = rbytes; ++ p = buf; ++ while (wbytes) { ++ size_t b; ++ /* support LSM and notify */ ++ err = b = vfsub_write_k(dst, p, wbytes, ++ &dst->f_pos, dlgt); ++ if (unlikely(err == -EAGAIN || err == -EINTR)) ++ continue; ++ if (unlikely(err < 0)) ++ break; ++ wbytes -= b; ++ p += b; ++ } ++ } else { ++ loff_t res; ++ LKTRLabel(hole); ++ *sparse = 1; ++ err = res = vfsub_llseek(dst, rbytes, SEEK_CUR); ++ if (unlikely(res < 0)) ++ break; ++ } ++ len -= rbytes; ++ err = 0; ++ } ++ ++ /* the last block may be a hole */ ++ if (unlikely(!err && all_zero)) { ++ struct dentry *h_d = dst->f_dentry; ++ struct inode *h_i = h_d->d_inode; ++ ++ LKTRLabel(last hole); ++ do { ++ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos, dlgt); ++ } while (err == -EAGAIN || err == -EINTR); ++ if (err == 1) { ++ ia->ia_size = dst->f_pos; ++ ia->ia_valid = ATTR_SIZE | ATTR_FILE; ++ ia->ia_file = dst; ++ hi_lock_child2(h_i); ++ err = vfsub_notify_change(h_d, ia, dlgt); ++ i_unlock(h_i); ++ } ++ } ++ ++ kfree(ia); ++ out_buf: ++ kfree(buf); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int test_ro(struct super_block *sb, aufs_bindex_t bindex, struct inode *inode) ++{ ++ int err; ++ ++ err = br_rdonly(stobr(sb, bindex)); ++ if (!err && inode) { ++ struct inode *hi = au_h_iptr_i(inode, bindex); ++ if (hi) ++ err = IS_IMMUTABLE(hi) ? -EROFS : 0; ++ } ++ return err; ++} ++ ++int au_test_perm(struct inode *hidden_inode, int mask, int dlgt) ++{ ++ if (!current->fsuid) ++ return 0; ++ if (unlikely(au_is_nfs(hidden_inode->i_sb) ++ && (mask & MAY_WRITE) ++ && S_ISDIR(hidden_inode->i_mode))) ++ mask |= MAY_READ; /* force permission check */ ++ return vfsub_permission(hidden_inode, mask, NULL, dlgt); ++} +diff --git a/fs/aufs/misc.h b/fs/aufs/misc.h +new file mode 100755 +index 0000000..fea4a2c +--- /dev/null ++++ b/fs/aufs/misc.h +@@ -0,0 +1,187 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: misc.h,v 1.25 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_MISC_H__ ++#define __AUFS_MISC_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/namei.h> ++#include <linux/sched.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ++#define I_MUTEX_QUOTA 0 ++#define lockdep_off() /* */ ++#define lockdep_on() /* */ ++#define mutex_lock_nested(mtx, lsc) mutex_lock(mtx) ++#define down_write_nested(rw, lsc) down_write(rw) ++#define down_read_nested(rw, lsc) down_read(rw) ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct aufs_rwsem { ++ struct rw_semaphore rwsem; ++#ifdef CONFIG_AUFS_DEBUG ++ atomic_t rcnt; ++#endif ++}; ++ ++#ifdef CONFIG_AUFS_DEBUG ++#define DbgRcntInit(rw) atomic_set(&(rw)->rcnt, 0) ++#define DbgRcntInc(rw) atomic_inc(&(rw)->rcnt) ++#define DbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0) ++#else ++#define DbgRcntInit(rw) /* */ ++#define DbgRcntInc(rw) /* */ ++#define DbgRcntDec(rw) /* */ ++#endif ++ ++static inline void rw_init_nolock(struct aufs_rwsem *rw) ++{ ++ DbgRcntInit(rw); ++ init_rwsem(&rw->rwsem); ++} ++ ++static inline void rw_init_wlock(struct aufs_rwsem *rw) ++{ ++ rw_init_nolock(rw); ++ down_write(&rw->rwsem); ++} ++ ++static inline void rw_init_wlock_nested(struct aufs_rwsem *rw, unsigned int lsc) ++{ ++ rw_init_nolock(rw); ++ down_write_nested(&rw->rwsem, lsc); ++} ++ ++static inline void rw_read_lock(struct aufs_rwsem *rw) ++{ ++ down_read(&rw->rwsem); ++ DbgRcntInc(rw); ++} ++ ++static inline void rw_read_lock_nested(struct aufs_rwsem *rw, unsigned int lsc) ++{ ++ down_read_nested(&rw->rwsem, lsc); ++ DbgRcntInc(rw); ++} ++ ++static inline void rw_read_unlock(struct aufs_rwsem *rw) ++{ ++ DbgRcntDec(rw); ++ up_read(&rw->rwsem); ++} ++ ++static inline void rw_dgrade_lock(struct aufs_rwsem *rw) ++{ ++ DbgRcntInc(rw); ++ downgrade_write(&rw->rwsem); ++} ++ ++static inline void rw_write_lock(struct aufs_rwsem *rw) ++{ ++ down_write(&rw->rwsem); ++} ++ ++static inline void rw_write_lock_nested(struct aufs_rwsem *rw, unsigned int lsc) ++{ ++ down_write_nested(&rw->rwsem, lsc); ++} ++ ++static inline void rw_write_unlock(struct aufs_rwsem *rw) ++{ ++ up_write(&rw->rwsem); ++} ++ ++#if 0 // why is not _nested version defined ++static inline int rw_read_trylock(struct aufs_rwsem *rw) ++{ ++ int ret = down_read_trylock(&rw->rwsem); ++ if (ret) ++ DbgRcntInc(rw); ++ return ret; ++} ++ ++static inline int rw_write_trylock(struct aufs_rwsem *rw) ++{ ++ return down_write_trylock(&rw->rwsem); ++} ++#endif ++ ++#undef DbgRcntInit ++#undef DbgRcntInc ++#undef DbgRcntDec ++ ++/* to debug easier, do not make them inlined functions */ ++#define RwMustNoWaiters(rw) DEBUG_ON(!list_empty(&(rw)->rwsem.wait_list)) ++#define RwMustAnyLock(rw) DEBUG_ON(down_write_trylock(&(rw)->rwsem)) ++#ifdef CONFIG_AUFS_DEBUG ++#define RwMustReadLock(rw) do { \ ++ RwMustAnyLock(rw); \ ++ DEBUG_ON(!atomic_read(&(rw)->rcnt)); \ ++} while (0) ++#define RwMustWriteLock(rw) do { \ ++ RwMustAnyLock(rw); \ ++ DEBUG_ON(atomic_read(&(rw)->rcnt)); \ ++} while (0) ++#else ++#define RwMustReadLock(rw) RwMustAnyLock(rw) ++#define RwMustWriteLock(rw) RwMustAnyLock(rw) ++#endif ++ ++#define SimpleLockRwsemFuncs(prefix, param, rwsem) \ ++static inline void prefix##_read_lock(param) {rw_read_lock(&(rwsem));} \ ++static inline void prefix##_write_lock(param) {rw_write_lock(&(rwsem));} ++//static inline void prefix##_read_trylock(param) {rw_read_trylock(&(rwsem));} ++//static inline void prefix##_write_trylock(param) {rw_write_trylock(&(rwsem));} ++//static inline void prefix##_read_trylock_nested(param, lsc) ++//{rw_read_trylock_nested(&(rwsem, lsc));} ++//static inline void prefix##_write_trylock_nestd(param, lsc) ++//{rw_write_trylock_nested(&(rwsem), nested);} ++ ++#define SimpleUnlockRwsemFuncs(prefix, param, rwsem) \ ++static inline void prefix##_read_unlock(param) {rw_read_unlock(&(rwsem));} \ ++static inline void prefix##_write_unlock(param) {rw_write_unlock(&(rwsem));} \ ++static inline void prefix##_downgrade_lock(param) {rw_dgrade_lock(&(rwsem));} ++ ++#define SimpleRwsemFuncs(prefix, param, rwsem) \ ++ SimpleLockRwsemFuncs(prefix, param, rwsem); \ ++ SimpleUnlockRwsemFuncs(prefix, param, rwsem) ++ ++/* ---------------------------------------------------------------------- */ ++ ++typedef ssize_t (*readf_t)(struct file*, char __user*, size_t, loff_t*); ++typedef ssize_t (*writef_t)(struct file*, const char __user*, size_t, loff_t*); ++ ++void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp); ++struct nameidata *fake_dm(struct nameidata *fake_nd, struct nameidata *nd, ++ struct super_block *sb, aufs_bindex_t bindex); ++void fake_dm_release(struct nameidata *fake_nd); ++int au_copy_file(struct file *dst, struct file *src, loff_t len, ++ struct super_block *sb, int *sparse); ++int test_ro(struct super_block *sb, aufs_bindex_t bindex, struct inode *inode); ++int au_test_perm(struct inode *h_inode, int mask, int dlgt); ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_MISC_H__ */ +diff --git a/fs/aufs/module.c b/fs/aufs/module.c +new file mode 100755 +index 0000000..06c563e +--- /dev/null ++++ b/fs/aufs/module.c +@@ -0,0 +1,334 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: module.c,v 1.9 2007/04/30 05:46:32 sfjro Exp $ */ ++ ++//#include <linux/init.h> ++//#include <linux/kobject.h> ++#include <linux/module.h> ++//#include <linux/seq_file.h> ++//#include <linux/sysfs.h> ++#include "aufs.h" ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * aufs caches ++ */ ++struct kmem_cache *aufs_cachep[AuCache_Last]; ++static int __init create_cache(void) ++{ ++#define Cache(type) kmem_cache_create(#type, sizeof(struct type), 0, \ ++ SLAB_RECLAIM_ACCOUNT, NULL, NULL) ++ ++ if ((aufs_cachep[AuCache_DINFO] = Cache(aufs_dinfo)) ++ && (aufs_cachep[AuCache_ICNTNR] = Cache(aufs_icntnr)) ++ && (aufs_cachep[AuCache_FINFO] = Cache(aufs_finfo)) ++ //&& (aufs_cachep[AuCache_FINFO] = NULL) ++ && (aufs_cachep[AuCache_VDIR] = Cache(aufs_vdir)) ++ && (aufs_cachep[AuCache_DEHSTR] = Cache(aufs_dehstr)) ++ && (aufs_cachep[AuCache_HINOTIFY] = Cache(aufs_hinotify))) ++ return 0; ++ return -ENOMEM; ++ ++#undef Cache ++} ++ ++static void destroy_cache(void) ++{ ++ int i; ++ for (i = 0; i < AuCache_Last; i++) ++ if (aufs_cachep[i]) ++ kmem_cache_destroy(aufs_cachep[i]); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */ ++int au_dir_roflags; ++extern struct file_system_type aufs_fs_type; ++ ++#ifdef DbgDlgt ++#include <linux/security.h> ++#include "dbg_dlgt.c" ++#else ++#define dbg_dlgt_init() 0 ++#define dbg_dlgt_fin() /* */ ++#endif ++ ++/* ++ * functions for module interface. ++ */ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Junjiro Okajima"); ++MODULE_DESCRIPTION(AUFS_NAME " -- Another unionfs"); ++MODULE_VERSION(AUFS_VERSION); ++ ++/* it should be 'byte', but param_set_byte() prints by "%c" */ ++short aufs_nwkq = AUFS_NWKQ_DEF; ++MODULE_PARM_DESC(nwkq, "the number of workqueue thread, " AUFS_WKQ_NAME); ++module_param_named(nwkq, aufs_nwkq, short, 0444); ++ ++int sysaufs_brs = 0; ++MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/brs"); ++module_param_named(brs, sysaufs_brs, int, 0444); ++ ++static int __init aufs_init(void) ++{ ++ int err, i; ++ char *p; ++ ++ //sbinfo->si_xino is atomic_long_t ++ BUILD_BUG_ON(sizeof(ino_t) != sizeof(long)); ++ ++#ifdef CONFIG_AUFS_DEBUG ++ { ++ struct aufs_destr destr; ++ destr.len = -1; ++ DEBUG_ON(destr.len < NAME_MAX); ++ } ++ ++#ifdef CONFIG_4KSTACKS ++ printk("CONFIG_4KSTACKS is defined.\n"); ++#endif ++#if 0 // verbose debug ++ { ++ union { ++ struct aufs_branch *br; ++ struct aufs_dinfo *di; ++ struct aufs_finfo *fi; ++ struct aufs_iinfo *ii; ++ struct aufs_hinode *hi; ++ struct aufs_sbinfo *si; ++ struct aufs_destr *destr; ++ struct aufs_de *de; ++ struct aufs_wh *wh; ++ struct aufs_vdir *vd; ++ } u; ++ ++ printk("br{" ++ "xino %d, readf %d, writef %d, " ++ "id %d, perm %d, mnt %d, count %d, " ++ "wh_sem %d, wh %d, run %d} %d\n", ++ offsetof(typeof(*u.br), br_xino), ++ offsetof(typeof(*u.br), br_xino_read), ++ offsetof(typeof(*u.br), br_xino_write), ++ offsetof(typeof(*u.br), br_id), ++ offsetof(typeof(*u.br), br_perm), ++ offsetof(typeof(*u.br), br_mnt), ++ offsetof(typeof(*u.br), br_count), ++ offsetof(typeof(*u.br), br_wh_rwsem), ++ offsetof(typeof(*u.br), br_wh), ++ offsetof(typeof(*u.br), br_wh_running), ++ sizeof(*u.br)); ++ printk("di{gen %d, rwsem %d, bstart %d, bend %d, bwh %d, " ++ "bdiropq %d, hdentry %d, reval %d} %d\n", ++ offsetof(typeof(*u.di), di_generation), ++ offsetof(typeof(*u.di), di_rwsem), ++ offsetof(typeof(*u.di), di_bstart), ++ offsetof(typeof(*u.di), di_bend), ++ offsetof(typeof(*u.di), di_bwh), ++ offsetof(typeof(*u.di), di_bdiropq), ++ offsetof(typeof(*u.di), di_hdentry), ++ offsetof(typeof(*u.di), di_reval), ++ sizeof(*u.di)); ++ printk("fi{gen %d, rwsem %d, hfile %d, bstart %d, bend %d, " ++ "h_vm_ops %d, vdir_cach %d} %d\n", ++ offsetof(typeof(*u.fi), fi_generation), ++ offsetof(typeof(*u.fi), fi_rwsem), ++ offsetof(typeof(*u.fi), fi_hfile), ++ offsetof(typeof(*u.fi), fi_bstart), ++ offsetof(typeof(*u.fi), fi_bend), ++ offsetof(typeof(*u.fi), fi_h_vm_ops), ++ offsetof(typeof(*u.fi), fi_vdir_cache), ++ sizeof(*u.fi)); ++ printk("ii{rwsem %d, bstart %d, bend %d, hinode %d, vdir %d} " ++ "%d\n", ++ offsetof(typeof(*u.ii), ii_rwsem), ++ offsetof(typeof(*u.ii), ii_bstart), ++ offsetof(typeof(*u.ii), ii_bend), ++ offsetof(typeof(*u.ii), ii_hinode), ++ offsetof(typeof(*u.ii), ii_vdir), ++ sizeof(*u.ii)); ++ printk("hi{inode %d, id %d, notify %d} %d\n", ++ offsetof(typeof(*u.hi), hi_inode), ++ offsetof(typeof(*u.hi), hi_id), ++ offsetof(typeof(*u.hi), hi_notify), ++ sizeof(*u.hi)); ++ printk("si{rwsem %d, gen %d, " ++ "failed_refresh %d, " ++ "bend %d, last id %d, br %d, " ++ "flags %d, " ++ "xino %d, " ++ "rdcache %d, " ++ "dirwh %d, " ++ "pl_lock %d, pl %d, " ++ "kobj %d} %d\n", ++ offsetof(typeof(*u.si), si_rwsem), ++ offsetof(typeof(*u.si), si_generation), ++ -1,//offsetof(typeof(*u.si), si_failed_refresh_dirs), ++ offsetof(typeof(*u.si), si_bend), ++ offsetof(typeof(*u.si), si_last_br_id), ++ offsetof(typeof(*u.si), si_branch), ++ offsetof(typeof(*u.si), si_flags), ++ offsetof(typeof(*u.si), si_xino), ++ offsetof(typeof(*u.si), si_rdcache), ++ offsetof(typeof(*u.si), si_dirwh), ++ offsetof(typeof(*u.si), si_plink_lock), ++ offsetof(typeof(*u.si), si_plink), ++ offsetof(typeof(*u.si), si_kobj), ++ sizeof(*u.si)); ++ printk("destr{len %d, name %d} %d\n", ++ offsetof(typeof(*u.destr), len), ++ offsetof(typeof(*u.destr), name), ++ sizeof(*u.destr)); ++ printk("de{ino %d, type %d, str %d} %d\n", ++ offsetof(typeof(*u.de), de_ino), ++ offsetof(typeof(*u.de), de_type), ++ offsetof(typeof(*u.de), de_str), ++ sizeof(*u.de)); ++ printk("wh{hash %d, bindex %d, str %d} %d\n", ++ offsetof(typeof(*u.wh), wh_hash), ++ offsetof(typeof(*u.wh), wh_bindex), ++ offsetof(typeof(*u.wh), wh_str), ++ sizeof(*u.wh)); ++ printk("vd{deblk %d, nblk %d, last %d, ver %d, jiffy %d} %d\n", ++ offsetof(typeof(*u.vd), vd_deblk), ++ offsetof(typeof(*u.vd), vd_nblk), ++ offsetof(typeof(*u.vd), vd_last), ++ offsetof(typeof(*u.vd), vd_version), ++ offsetof(typeof(*u.vd), vd_jiffy), ++ sizeof(*u.vd)); ++ } ++#endif ++#endif ++ ++ p = au_esc_chars; ++ for (i = 1; i <= ' '; i++) ++ *p++ = i; ++ *p++ = '\\'; ++ *p++ = '\x7f'; ++ *p = 0; ++ ++ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE); ++#ifndef CONFIG_AUFS_SYSAUFS ++ sysaufs_brs = 0; ++#endif ++ ++ err = -EINVAL; ++ if (unlikely(aufs_nwkq <= 0)) ++ goto out; ++ err = create_cache(); ++ if (unlikely(err)) ++ goto out; ++ err = sysaufs_init(); ++ if (unlikely(err)) ++ goto out_cache; ++ err = au_wkq_init(); ++ if (unlikely(err)) ++ goto out_kobj; ++ err = au_inotify_init(); ++ if (unlikely(err)) ++ goto out_wkq; ++ err = dbg_dlgt_init(); ++ if (unlikely(err)) ++ goto out_inotify; ++ err = register_filesystem(&aufs_fs_type); ++ if (unlikely(err)) ++ goto out_dlgt; ++ printk(AUFS_NAME " " AUFS_VERSION "\n"); ++ return 0; /* success */ ++ ++ out_dlgt: ++ dbg_dlgt_fin(); ++ out_inotify: ++ au_inotify_fin(); ++ out_wkq: ++ au_wkq_fin(); ++ out_kobj: ++ sysaufs_fin(); ++ out_cache: ++ destroy_cache(); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static void __exit aufs_exit(void) ++{ ++ unregister_filesystem(&aufs_fs_type); ++ dbg_dlgt_fin(); ++ au_inotify_fin(); ++ au_wkq_fin(); ++ sysaufs_fin(); ++ destroy_cache(); ++} ++ ++module_init(aufs_init); ++module_exit(aufs_exit); ++ ++/* ---------------------------------------------------------------------- */ ++ ++// fake Kconfig ++#if 1 ++#ifdef CONFIG_AUFS_HINOTIFY ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ++#error CONFIG_AUFS_HINOTIFY is supported in linux-2.6.18 and later. ++#endif ++#ifndef CONFIG_INOTIFY ++#error enable CONFIG_INOTIFY to use CONFIG_AUFS_HINOTIFY. ++#endif ++#endif ++ ++#if AUFS_BRANCH_MAX > 511 && BITS_PER_LONG == 64 && PAGE_SIZE == 4096 ++#warning For 4k pagesize and 64bit environment, \ ++ CONFIG_AUFS_BRANCH_MAX_511 or smaller is recommended. ++#endif ++ ++#ifdef CONFIG_AUFS_SYSAUFS ++#ifndef CONFIG_SYSFS ++#error CONFIG_AUFS_SYSAUFS requires CONFIG_SYSFS. ++#endif ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ++#error CONFIG_AUFS_SYSAUFS requires linux-2.6.18 and later. ++#endif ++#endif ++ ++#ifdef CONFIG_AUFS_EXPORT ++#if !defined(CONFIG_EXPORTFS) && !defined(CONFIG_EXPORTFS_MODULE) ++#error CONFIG_AUFS_EXPORT requires CONFIG_EXPORTFS ++#endif ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ++#error CONFIG_AUFS_EXPORT requires linux-2.6.18 and later. ++#endif ++#if defined(CONFIG_EXPORTFS_MODULE) && defined(CONFIG_AUFS) ++#error need CONFIG_EXPORTFS=y to link aufs statically with CONFIG_AUFS_EXPORT ++#endif ++#endif ++ ++#ifdef CONFIG_DEBUG_PROVE_LOCKING ++#if MAX_LOCKDEP_SUBCLASSES < AuLsc_End ++#warning lockdep will not work since aufs uses deeper locks. ++#endif ++#endif ++ ++#ifdef CONFIG_AUFS_COMPAT ++#warning CONFIG_AUFS_COMPAT will be removed in the near future. ++#endif ++ ++#endif +diff --git a/fs/aufs/module.h b/fs/aufs/module.h +new file mode 100755 +index 0000000..3769861 +--- /dev/null ++++ b/fs/aufs/module.h +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: module.h,v 1.8 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_MODULE_H__ ++#define __AUFS_MODULE_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/slab.h> ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* module parameters */ ++extern short aufs_nwkq; ++extern int sysaufs_brs; ++ ++/* ---------------------------------------------------------------------- */ ++ ++extern char au_esc_chars[]; ++extern int au_dir_roflags; ++ ++/* kmem cache */ ++enum {AuCache_DINFO, AuCache_ICNTNR, AuCache_FINFO, AuCache_VDIR, ++ AuCache_DEHSTR, AuCache_HINOTIFY, AuCache_Last}; ++extern struct kmem_cache *aufs_cachep[]; ++ ++#define CacheFuncs(name, index) \ ++static inline void *cache_alloc_##name(void) \ ++{return kmem_cache_alloc(aufs_cachep[index], GFP_KERNEL);} \ ++static inline void cache_free_##name(void *p) \ ++{kmem_cache_free(aufs_cachep[index], p);} ++ ++CacheFuncs(dinfo, AuCache_DINFO); ++CacheFuncs(icntnr, AuCache_ICNTNR); ++CacheFuncs(finfo, AuCache_FINFO); ++CacheFuncs(vdir, AuCache_VDIR); ++CacheFuncs(dehstr, AuCache_DEHSTR); ++CacheFuncs(hinotify, AuCache_HINOTIFY); ++ ++#undef CacheFuncs ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_MODULE_H__ */ +diff --git a/fs/aufs/opts.c b/fs/aufs/opts.c +new file mode 100755 +index 0000000..c1a9445 +--- /dev/null ++++ b/fs/aufs/opts.c +@@ -0,0 +1,1043 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: opts.c,v 1.34 2007/05/14 03:40:27 sfjro Exp $ */ ++ ++#include <asm/types.h> // a distribution requires ++#include <linux/parser.h> ++#include "aufs.h" ++ ++enum { ++ Opt_br, ++ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend, ++ Opt_idel, Opt_imod, ++ Opt_dirwh, Opt_rdcache, Opt_deblk, Opt_nhash, ++ Opt_xino, Opt_zxino, Opt_noxino, ++ Opt_plink, Opt_noplink, Opt_list_plink, Opt_clean_plink, ++ Opt_udba, ++ Opt_diropq_a, Opt_diropq_w, ++ Opt_warn_perm, Opt_nowarn_perm, ++ Opt_findrw_dir, Opt_findrw_br, ++ Opt_coo, ++ Opt_dlgt, Opt_nodlgt, ++ Opt_tail, Opt_ignore, Opt_err ++}; ++ ++static match_table_t options = { ++ {Opt_br, "br=%s"}, ++ {Opt_br, "br:%s"}, ++ ++ {Opt_add, "add=%d:%s"}, ++ {Opt_add, "add:%d:%s"}, ++ {Opt_add, "ins=%d:%s"}, ++ {Opt_add, "ins:%d:%s"}, ++ {Opt_append, "append=%s"}, ++ {Opt_append, "append:%s"}, ++ {Opt_prepend, "prepend=%s"}, ++ {Opt_prepend, "prepend:%s"}, ++ ++ {Opt_del, "del=%s"}, ++ {Opt_del, "del:%s"}, ++ //{Opt_idel, "idel:%d"}, ++ {Opt_mod, "mod=%s"}, ++ {Opt_mod, "mod:%s"}, ++ //{Opt_imod, "imod:%d:%s"}, ++ ++ {Opt_dirwh, "dirwh=%d"}, ++ {Opt_dirwh, "dirwh:%d"}, ++ ++ {Opt_xino, "xino=%s"}, ++ {Opt_xino, "xino:%s"}, ++ {Opt_noxino, "noxino"}, ++ //{Opt_zxino, "zxino=%s"}, ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) ++ {Opt_plink, "plink"}, ++ {Opt_noplink, "noplink"}, ++#ifdef CONFIG_AUFS_DEBUG ++ {Opt_list_plink, "list_plink"}, ++#endif ++ {Opt_clean_plink, "clean_plink"}, ++#endif ++ ++ {Opt_udba, "udba=%s"}, ++ ++ {Opt_diropq_a, "diropq=always"}, ++ {Opt_diropq_a, "diropq=a"}, ++ {Opt_diropq_w, "diropq=whiteouted"}, ++ {Opt_diropq_w, "diropq=w"}, ++ ++ {Opt_warn_perm, "warn_perm"}, ++ {Opt_nowarn_perm, "nowarn_perm"}, ++ ++#ifdef CONFIG_AUFS_DLGT ++ {Opt_dlgt, "dlgt"}, ++ {Opt_nodlgt, "nodlgt"}, ++#endif ++ ++ {Opt_rdcache, "rdcache=%d"}, ++ {Opt_rdcache, "rdcache:%d"}, ++#if 0 ++ {Opt_findrw_dir, "findrw=dir"}, ++ {Opt_findrw_br, "findrw=br"}, ++ ++ {Opt_coo, "coo=%s"}, ++ ++ {Opt_deblk, "deblk=%d"}, ++ {Opt_deblk, "deblk:%d"}, ++ {Opt_nhash, "nhash=%d"}, ++ {Opt_nhash, "nhash:%d"}, ++#endif ++ ++ {Opt_br, "dirs=%s"}, ++ {Opt_ignore, "debug=%d"}, ++ {Opt_ignore, "delete=whiteout"}, ++ {Opt_ignore, "delete=all"}, ++ {Opt_ignore, "imap=%s"}, ++ ++ {Opt_err, NULL} ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++#define RW "rw" ++#define RO "ro" ++#define WH "wh" ++#define RR "rr" ++#define NoLinkWH "nolwh" ++ ++static match_table_t brperms = { ++ {AuBr_RR, RR}, ++ {AuBr_RO, RO}, ++ {AuBr_RW, RW}, ++ ++ {AuBr_RRWH, RR "+" WH}, ++ {AuBr_ROWH, RO "+" WH}, ++ {AuBr_RWNoLinkWH, RW "+" NoLinkWH}, ++ ++ {AuBr_ROWH, "nfsro"}, ++ {AuBr_RO, NULL} ++}; ++ ++static int br_perm_val(char *perm) ++{ ++ int val; ++ substring_t args[MAX_OPT_ARGS]; ++ ++ DEBUG_ON(!perm || !*perm); ++ LKTRTrace("perm %s\n", perm); ++ val = match_token(perm, brperms, args); ++ TraceErr(val); ++ return val; ++} ++ ++int br_perm_str(char *p, unsigned int len, int brperm) ++{ ++ struct match_token *bp = brperms; ++ ++ LKTRTrace("len %d, 0x%x\n", len, brperm); ++ ++ while (bp->pattern) { ++ if (bp->token == brperm) { ++ if (strlen(bp->pattern) < len) { ++ strcpy(p, bp->pattern); ++ return 0; ++ } else ++ return -E2BIG; ++ } ++ bp++; ++ } ++ ++ return -EIO; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static match_table_t udbalevel = { ++ {AuFlag_UDBA_REVAL, "reval"}, ++#ifdef CONFIG_AUFS_HINOTIFY ++ {AuFlag_UDBA_INOTIFY, "inotify"}, ++#endif ++ {AuFlag_UDBA_NONE, "none"}, ++ {-1, NULL} ++}; ++ ++static int udba_val(char *str) ++{ ++ substring_t args[MAX_OPT_ARGS]; ++ return match_token(str, udbalevel, args); ++} ++ ++au_parser_pattern_t udba_str(int udba) ++{ ++ struct match_token *p = udbalevel; ++ while (p->pattern) { ++ if (p->token == udba) ++ return p->pattern; ++ p++; ++ } ++ BUG(); ++ return "??"; ++} ++ ++void udba_set(struct super_block *sb, unsigned int flg) ++{ ++ au_flag_clr(sb, AuMask_UDBA); ++ au_flag_set(sb, flg); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static match_table_t coolevel = { ++ {AuFlag_COO_LEAF, "leaf"}, ++ {AuFlag_COO_ALL, "all"}, ++ {AuFlag_COO_NONE, "none"}, ++ {-1, NULL} ++}; ++ ++#if 0 ++static int coo_val(char *str) ++{ ++ substring_t args[MAX_OPT_ARGS]; ++ return match_token(str, coolevel, args); ++} ++#endif ++ ++au_parser_pattern_t coo_str(int coo) ++{ ++ struct match_token *p = coolevel; ++ while (p->pattern) { ++ if (p->token == coo) ++ return p->pattern; ++ p++; ++ } ++ BUG(); ++ return "??"; ++} ++static void coo_set(struct super_block *sb, unsigned int flg) ++{ ++ au_flag_clr(sb, AuMask_COO); ++ au_flag_set(sb, flg); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; ++ ++#ifdef CONFIG_AUFS_DEBUG ++static void dump_opts(struct opts *opts) ++{ ++ /* reduce stack space */ ++ union { ++ struct opt_add *add; ++ struct opt_del *del; ++ struct opt_mod *mod; ++ struct opt_xino *xino; ++ } u; ++ struct opt *opt; ++ ++ TraceEnter(); ++ ++ opt = opts->opt; ++ while (/* opt < opts_tail && */ opt->type != Opt_tail) { ++ switch (opt->type) { ++ case Opt_add: ++ u.add = &opt->add; ++ LKTRTrace("add {b%d, %s, 0x%x, %p}\n", ++ u.add->bindex, u.add->path, u.add->perm, ++ u.add->nd.dentry); ++ break; ++ case Opt_del: ++ case Opt_idel: ++ u.del = &opt->del; ++ LKTRTrace("del {%s, %p}\n", u.del->path, u.del->h_root); ++ break; ++ case Opt_mod: ++ case Opt_imod: ++ u.mod = &opt->mod; ++ LKTRTrace("mod {%s, 0x%x, %p}\n", ++ u.mod->path, u.mod->perm, u.mod->h_root); ++ break; ++ case Opt_append: ++ u.add = &opt->add; ++ LKTRTrace("append {b%d, %s, 0x%x, %p}\n", ++ u.add->bindex, u.add->path, u.add->perm, ++ u.add->nd.dentry); ++ break; ++ case Opt_prepend: ++ u.add = &opt->add; ++ LKTRTrace("prepend {b%d, %s, 0x%x, %p}\n", ++ u.add->bindex, u.add->path, u.add->perm, ++ u.add->nd.dentry); ++ break; ++ case Opt_dirwh: ++ LKTRTrace("dirwh %d\n", opt->dirwh); ++ break; ++ case Opt_rdcache: ++ LKTRTrace("rdcache %d\n", opt->rdcache); ++ break; ++ case Opt_xino: ++ u.xino = &opt->xino; ++ LKTRTrace("xino {%s %.*s}\n", ++ u.xino->path, DLNPair(u.xino->file->f_dentry)); ++ break; ++ case Opt_noxino: ++ LKTRLabel(noxino); ++ break; ++ case Opt_plink: ++ LKTRLabel(plink); ++ break; ++ case Opt_noplink: ++ LKTRLabel(noplink); ++ break; ++ case Opt_list_plink: ++ LKTRLabel(list_plink); ++ break; ++ case Opt_clean_plink: ++ LKTRLabel(clean_plink); ++ break; ++ case Opt_udba: ++ LKTRTrace("udba %d, %s\n", ++ opt->udba, udba_str(opt->udba)); ++ break; ++ case Opt_diropq_a: ++ LKTRLabel(diropq_a); ++ break; ++ case Opt_diropq_w: ++ LKTRLabel(diropq_w); ++ break; ++ case Opt_warn_perm: ++ LKTRLabel(warn_perm); ++ break; ++ case Opt_nowarn_perm: ++ LKTRLabel(nowarn_perm); ++ break; ++ case Opt_dlgt: ++ LKTRLabel(dlgt); ++ break; ++ case Opt_nodlgt: ++ LKTRLabel(nodlgt); ++ break; ++ case Opt_coo: ++ LKTRTrace("coo %d, %s\n", opt->coo, coo_str(opt->coo)); ++ break; ++ default: ++ BUG(); ++ } ++ opt++; ++ } ++} ++#else ++#define dump_opts(opts) /* */ ++#endif ++ ++void au_free_opts(struct opts *opts) ++{ ++ struct opt *opt; ++ ++ TraceEnter(); ++ ++ opt = opts->opt; ++ while (opt->type != Opt_tail) { ++ switch (opt->type) { ++ case Opt_add: ++ case Opt_append: ++ case Opt_prepend: ++ path_release(&opt->add.nd); ++ break; ++ case Opt_del: ++ case Opt_idel: ++ dput(opt->del.h_root); ++ break; ++ case Opt_mod: ++ case Opt_imod: ++ dput(opt->mod.h_root); ++ break; ++ case Opt_xino: ++ fput(opt->xino.file); ++ break; ++ } ++ opt++; ++ } ++} ++ ++static int opt_add(struct opt *opt, char *opt_str, struct super_block *sb, ++ aufs_bindex_t bindex) ++{ ++ int err; ++ struct opt_add *add = &opt->add; ++ char *p; ++ ++ LKTRTrace("%s, b%d\n", opt_str, bindex); ++ ++ add->bindex = bindex; ++ add->perm = AuBr_RO; ++ if (!bindex && !(sb->s_flags & MS_RDONLY)) ++ add->perm = AuBr_RW; ++#ifdef CONFIG_AUFS_COMPAT ++ add->perm = AuBr_RW; ++#endif ++ add->path = opt_str; ++ p = strchr(opt_str, '='); ++ if (unlikely(p)) { ++ *p++ = 0; ++ if (*p) ++ add->perm = br_perm_val(p); ++ } ++ ++ // LSM may detect it ++ // do not superio. ++ err = path_lookup(add->path, lkup_dirflags, &add->nd); ++ //err = -1; ++ if (!err) { ++ opt->type = Opt_add; ++ goto out; ++ } ++ Err("lookup failed %s (%d)\n", add->path, err); ++ err = -EINVAL; ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* called without aufs lock */ ++int au_parse_opts(struct super_block *sb, char *str, struct opts *opts) ++{ ++ int err, n; ++ struct dentry *root; ++ struct opt *opt, *opt_tail; ++ char *opt_str; ++ substring_t args[MAX_OPT_ARGS]; ++ aufs_bindex_t bindex; ++ struct nameidata nd; ++ /* reduce stack space */ ++ union { ++ struct opt_del *del; ++ struct opt_mod *mod; ++ struct opt_xino *xino; ++ } u; ++ struct file *file; ++ ++ LKTRTrace("%s, nopts %d\n", str, opts->max_opt); ++ ++ root = sb->s_root; ++ err = 0; ++ bindex = 0; ++ opt = opts->opt; ++ opt_tail = opt + opts->max_opt - 1; ++ opt->type = Opt_tail; ++ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) { ++ int token, skipped; ++ char *p; ++ err = -EINVAL; ++ token = match_token(opt_str, options, args); ++ LKTRTrace("%s, token %d, args[0]{%p, %p}\n", ++ opt_str, token, args[0].from, args[0].to); ++ ++ skipped = 0; ++ switch (token) { ++ case Opt_br: ++ err = 0; ++ while (!err && (opt_str = strsep(&args[0].from, ":")) ++ && *opt_str) { ++ err = opt_add(opt, opt_str, sb, bindex++); ++ //if (LktrCond) err = -1; ++ if (unlikely(!err && ++opt > opt_tail)) { ++ err = -E2BIG; ++ break; ++ } ++ opt->type = Opt_tail; ++ skipped = 1; ++ } ++ break; ++ case Opt_add: ++ if (unlikely(match_int(&args[0], &n))) { ++ Err("bad integer in %s\n", opt_str); ++ break; ++ } ++ bindex = n; ++ err = opt_add(opt, args[1].from, sb, bindex); ++ break; ++ case Opt_append: ++ case Opt_prepend: ++ err = opt_add(opt, args[0].from, sb, /*dummy bindex*/1); ++ if (!err) ++ opt->type = token; ++ break; ++ case Opt_del: ++ u.del = &opt->del; ++ u.del->path = args[0].from; ++ LKTRTrace("del path %s\n", u.del->path); ++ // LSM may detect it ++ // do not superio. ++ err = path_lookup(u.del->path, lkup_dirflags, &nd); ++ if (unlikely(err)) { ++ Err("lookup failed %s (%d)\n", u.del->path, err); ++ break; ++ } ++ u.del->h_root = dget(nd.dentry); ++ path_release(&nd); ++ opt->type = token; ++ break; ++#if 0 ++ case Opt_idel: ++ u.del = &opt->del; ++ u.del->path = "(indexed)"; ++ if (unlikely(match_int(&args[0], &n))) { ++ Err("bad integer in %s\n", opt_str); ++ break; ++ } ++ bindex = n; ++ aufs_read_lock(root, !AUFS_I_RLOCK); ++ if (bindex < 0 || sbend(sb) < bindex) { ++ Err("out of bounds, %d\n", bindex); ++ aufs_read_unlock(root, !AUFS_I_RLOCK); ++ break; ++ } ++ err = 0; ++ u.del->h_root = dget(au_h_dptr_i(root, bindex)); ++ opt->type = token; ++ aufs_read_unlock(root, !AUFS_I_RLOCK); ++ break; ++#endif ++ ++ case Opt_mod: ++ u.mod = &opt->mod; ++ u.mod->path = args[0].from; ++ p = strchr(u.mod->path, '='); ++ if (unlikely(!p)) { ++ Err("no permssion %s\n", opt_str); ++ break; ++ } ++ *p++ = 0; ++ u.mod->perm = br_perm_val(p); ++ LKTRTrace("mod path %s, perm 0x%x, %s\n", ++ u.mod->path, u.mod->perm, p); ++ // LSM may detect it ++ // do not superio. ++ err = path_lookup(u.mod->path, lkup_dirflags, &nd); ++ if (unlikely(err)) { ++ Err("lookup failed %s (%d)\n", u.mod->path, err); ++ break; ++ } ++ u.mod->h_root = dget(nd.dentry); ++ path_release(&nd); ++ opt->type = token; ++ break; ++#if 0 ++ case Opt_imod: ++ u.mod = &opt->mod; ++ u.mod->path = "(indexed)"; ++ if (unlikely(match_int(&args[0], &n))) { ++ Err("bad integer in %s\n", opt_str); ++ break; ++ } ++ bindex = n; ++ aufs_read_lock(root, !AUFS_I_RLOCK); ++ if (bindex < 0 || sbend(sb) < bindex) { ++ Err("out of bounds, %d\n", bindex); ++ aufs_read_unlock(root, !AUFS_I_RLOCK); ++ break; ++ } ++ u.mod->perm = br_perm_val(args[1].from); ++ LKTRTrace("mod path %s, perm 0x%x, %s\n", ++ u.mod->path, u.mod->perm, args[1].from); ++ err = 0; ++ u.mod->h_root = dget(au_h_dptr_i(root, bindex)); ++ opt->type = token; ++ aufs_read_unlock(root, !AUFS_I_RLOCK); ++ break; ++#endif ++ case Opt_xino: ++ u.xino = &opt->xino; ++ file = xino_create(sb, args[0].from, /*silent*/0, ++ /*parent*/NULL); ++ err = PTR_ERR(file); ++ if (IS_ERR(file)) ++ break; ++ err = -EINVAL; ++ if (unlikely(file->f_dentry->d_sb == sb)) { ++ fput(file); ++ Err("%s must be outside\n", args[0].from); ++ break; ++ } ++ err = 0; ++ u.xino->file = file; ++ u.xino->path = args[0].from; ++ opt->type = token; ++ break; ++ ++ case Opt_dirwh: ++ if (unlikely(match_int(&args[0], &opt->dirwh))) ++ break; ++ err = 0; ++ opt->type = token; ++ break; ++ ++ case Opt_rdcache: ++ if (unlikely(match_int(&args[0], &opt->rdcache))) ++ break; ++ err = 0; ++ opt->type = token; ++ break; ++ ++ case Opt_noxino: ++ case Opt_plink: ++ case Opt_noplink: ++ case Opt_list_plink: ++ case Opt_clean_plink: ++ case Opt_diropq_a: ++ case Opt_diropq_w: ++ case Opt_warn_perm: ++ case Opt_nowarn_perm: ++ case Opt_dlgt: ++ case Opt_nodlgt: ++ err = 0; ++ opt->type = token; ++ break; ++ ++ case Opt_udba: ++ opt->udba = udba_val(args[0].from); ++ if (opt->udba >= 0) { ++ err = 0; ++ opt->type = token; ++ } ++ break; ++ ++#if 0 ++ case Opt_coo: ++ opt->coo = coo_val(args[0].from); ++ if (opt->coo >= 0) { ++ err = 0; ++ opt->type = token; ++ } ++ break; ++#endif ++ ++ case Opt_ignore: ++#ifndef CONFIG_AUFS_COMPAT ++ Warn("ignored %s\n", opt_str); ++#endif ++ skipped = 1; ++ err = 0; ++ break; ++ case Opt_err: ++ Err("unknown option %s\n", opt_str); ++ break; ++ } ++ ++ if (!err && !skipped) { ++ if (unlikely(++opt > opt_tail)) { ++ err = -E2BIG; ++ opt--; ++ opt->type = Opt_tail; ++ break; ++ } ++ opt->type = Opt_tail; ++ } ++ } ++ ++ dump_opts(opts); ++ if (unlikely(err)) ++ au_free_opts(opts); ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * returns, ++ * plus: processed without an error ++ * zero: unprocessed ++ */ ++static int au_do_opt_simple(struct super_block *sb, struct opt *opt, ++ int remount, unsigned int *given) ++{ ++ int err; ++ struct aufs_sbinfo *sbinfo = stosi(sb); ++ ++ TraceEnter(); ++ ++ err = 1; /* handled */ ++ switch (opt->type) { ++ case Opt_udba: ++ udba_set(sb, opt->udba); ++ *given |= opt->udba; ++ break; ++ ++ case Opt_plink: ++ au_flag_set(sb, AuFlag_PLINK); ++ *given |= AuFlag_PLINK; ++ break; ++ case Opt_noplink: ++ if (au_flag_test(sb, AuFlag_PLINK)) ++ au_put_plink(sb); ++ au_flag_clr(sb, AuFlag_PLINK); ++ *given |= AuFlag_PLINK; ++ break; ++ case Opt_list_plink: ++ if (au_flag_test(sb, AuFlag_PLINK)) ++ au_list_plink(sb); ++ break; ++ case Opt_clean_plink: ++ if (au_flag_test(sb, AuFlag_PLINK)) ++ au_put_plink(sb); ++ break; ++ ++ case Opt_diropq_a: ++ au_flag_set(sb, AuFlag_ALWAYS_DIROPQ); ++ *given |= AuFlag_ALWAYS_DIROPQ; ++ break; ++ case Opt_diropq_w: ++ au_flag_clr(sb, AuFlag_ALWAYS_DIROPQ); ++ *given |= AuFlag_ALWAYS_DIROPQ; ++ break; ++ ++ case Opt_dlgt: ++ au_flag_set(sb, AuFlag_DLGT); ++ *given |= AuFlag_DLGT; ++ break; ++ case Opt_nodlgt: ++ au_flag_clr(sb, AuFlag_DLGT); ++ *given |= AuFlag_DLGT; ++ break; ++ ++ case Opt_warn_perm: ++ au_flag_set(sb, AuFlag_WARN_PERM); ++ *given |= AuFlag_WARN_PERM; ++ break; ++ case Opt_nowarn_perm: ++ au_flag_clr(sb, AuFlag_WARN_PERM); ++ *given |= AuFlag_WARN_PERM; ++ break; ++ ++ case Opt_coo: ++ coo_set(sb, opt->coo); ++ *given |= opt->coo; ++ break; ++ ++ case Opt_dirwh: ++ sbinfo->si_dirwh = opt->dirwh; ++ break; ++ ++ case Opt_rdcache: ++ sbinfo->si_rdcache = opt->rdcache * HZ; ++ break; ++ ++ default: ++ err = 0; ++ break; ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * returns tri-state. ++ * plus: processed without an error ++ * zero: unprocessed ++ * minus: error ++ */ ++static int au_do_opt_br(struct super_block *sb, struct opt *opt, int remount, ++ int *do_refresh) ++{ ++ int err; ++ ++ TraceEnter(); ++ ++ err = 0; ++ switch (opt->type) { ++ case Opt_append: ++ opt->add.bindex = sbend(sb) + 1; ++ goto add; ++ case Opt_prepend: ++ opt->add.bindex = 0; ++ add: ++ case Opt_add: ++ err = br_add(sb, &opt->add, remount); ++ if (!err) ++ *do_refresh = err = 1; ++ break; ++ ++ case Opt_del: ++ case Opt_idel: ++ err = br_del(sb, &opt->del, remount); ++ if (!err) ++ *do_refresh = err = 1; ++ break; ++ ++ case Opt_mod: ++ case Opt_imod: ++ err = br_mod(sb, &opt->mod, remount, do_refresh); ++ if (!err) ++ err = 1; ++ break; ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int au_do_opt_xino(struct super_block *sb, struct opt *opt, int remount, ++ struct opt_xino **opt_xino) ++{ ++ int err; ++ ++ TraceEnter(); ++ ++ err = 0; ++ switch (opt->type) { ++ case Opt_xino: ++ err = xino_set(sb, &opt->xino, remount); ++ if (!err) ++ *opt_xino = &opt->xino; ++ break; ++ case Opt_noxino: ++ err = xino_clr(sb); ++ break; ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int verify_opts(struct super_block *sb, int remount) ++{ ++ int err; ++ aufs_bindex_t bindex, bend; ++ struct aufs_branch *br; ++ struct dentry *root; ++ struct inode *dir; ++ unsigned int do_plink; ++ ++ TraceEnter(); ++ ++ if (unlikely(!(sb->s_flags & MS_RDONLY) ++ && !br_writable(sbr_perm(sb, 0)))) ++ Warn("first branch should be rw\n"); ++ ++ err = 0; ++ root = sb->s_root; ++ dir = sb->s_root->d_inode; ++ do_plink = au_flag_test(sb, AuFlag_PLINK); ++ bend = sbend(sb); ++ for (bindex = 0; !err && bindex <= bend; bindex++) { ++ struct inode *h_dir; ++ int skip; ++ ++ skip = 0; ++ h_dir = au_h_iptr_i(dir, bindex); ++ br = stobr(sb, bindex); ++ br_wh_read_lock(br); ++ switch (br->br_perm) { ++ case AuBr_RR: ++ case AuBr_RO: ++ case AuBr_RRWH: ++ case AuBr_ROWH: ++ skip = (!br->br_wh && !br->br_plink); ++ break; ++ ++ case AuBr_RWNoLinkWH: ++ skip = !br->br_wh; ++ if (skip) { ++ if (do_plink) ++ skip = !!br->br_plink; ++ else ++ skip = !br->br_plink; ++ } ++ break; ++ ++ case AuBr_RW: ++ skip = !!br->br_wh; ++ if (skip) { ++ if (do_plink) ++ skip = !!br->br_plink; ++ else ++ skip = !br->br_plink; ++ } ++ break; ++ ++ default: ++ BUG(); ++ } ++ br_wh_read_unlock(br); ++ ++ if (skip) ++ continue; ++ ++ hdir_lock(h_dir, dir, bindex); ++ br_wh_write_lock(br); ++ err = init_wh(au_h_dptr_i(root, bindex), br, ++ au_nfsmnt(sb, bindex), sb); ++ br_wh_write_unlock(br); ++ hdir_unlock(h_dir, dir, bindex); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++int au_do_opts_mount(struct super_block *sb, struct opts *opts) ++{ ++ int err, do_refresh; ++ struct inode *dir; ++ struct opt *opt; ++ unsigned int flags, given; ++ struct opt_xino *opt_xino; ++ aufs_bindex_t bend, bindex; ++ ++ TraceEnter(); ++ SiMustWriteLock(sb); ++ DiMustWriteLock(sb->s_root); ++ dir = sb->s_root->d_inode; ++ IiMustWriteLock(dir); ++ ++ err = 0; ++ given = 0; ++ opt_xino = NULL; ++ opt = opts->opt; ++ while (err >= 0 && opt->type != Opt_tail) ++ err = au_do_opt_simple(sb, opt++, /*remount*/0, &given); ++ if (err > 0) ++ err = 0; ++ else if (unlikely(err < 0)) ++ goto out; ++ ++ /* disable them temporary */ ++ flags = au_flag_test(sb, AuFlag_XINO | AuMask_UDBA | AuFlag_DLGT); ++ au_flag_clr(sb, AuFlag_XINO | AuFlag_DLGT); ++ udba_set(sb, AuFlag_UDBA_REVAL); ++ ++ do_refresh = 0; ++ opt = opts->opt; ++ while (err >= 0 && opt->type != Opt_tail) ++ err = au_do_opt_br(sb, opt++, /*remount*/0, &do_refresh); ++ if (err > 0) ++ err = 0; ++ else if (unlikely(err < 0)) ++ goto out; ++ ++ bend = sbend(sb); ++ if (unlikely(bend < 0)) { ++ err = -EINVAL; ++ Err("no branches\n"); ++ goto out; ++ } ++ ++ if (flags & AuFlag_XINO) ++ au_flag_set(sb, AuFlag_XINO); ++ opt = opts->opt; ++ while (!err && opt->type != Opt_tail) ++ err = au_do_opt_xino(sb, opt++, /*remount*/0, &opt_xino); ++ if (unlikely(err)) ++ goto out; ++ ++ //todo: test this error case. ++ err = verify_opts(sb, /*remount*/0); ++ DEBUG_ON(err); ++ if (unlikely(err)) ++ goto out; ++ ++ /* enable xino */ ++ if (au_flag_test(sb, AuFlag_XINO) && !opt_xino) { ++ struct file *xino_file = xino_def(sb); ++ err = PTR_ERR(xino_file); ++ if (IS_ERR(xino_file)) ++ goto out; ++ ++ err = 0; ++ for (bindex = 0; !err && bindex <= bend; bindex++) ++ err = xino_init(sb, bindex, xino_file, ++ /*do_test*/bindex); ++ fput(xino_file); ++ if (unlikely(err)) ++ goto out; ++ } ++ ++ /* restore hinotify */ ++ udba_set(sb, flags & AuMask_UDBA); ++ if (flags & AuFlag_UDBA_INOTIFY) ++ au_reset_hinotify(dir, au_hi_flags(dir, 1) & ~AUFS_HI_XINO); ++ ++ /* restore dlgt */ ++ if (flags & AuFlag_DLGT) ++ au_flag_set(sb, AuFlag_DLGT); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++int au_do_opts_remount(struct super_block *sb, struct opts *opts, ++ int *do_refresh, unsigned int *given) ++{ ++ int err, rerr; ++ struct inode *dir; ++ struct opt_xino *opt_xino; ++ struct opt *opt; ++ unsigned int dlgt; ++ ++ TraceEnter(); ++ SiMustWriteLock(sb); ++ DiMustWriteLock(sb->s_root); ++ dir = sb->s_root->d_inode; ++ IiMustWriteLock(dir); ++ //DEBUG_ON(au_flag_test(sb, AuFlag_UDBA_INOTIFY)); ++ ++ err = 0; ++ *do_refresh = 0; ++ *given = 0; ++ dlgt = au_flag_test(sb, AuFlag_DLGT); ++ opt_xino = NULL; ++ opt = opts->opt; ++ while (err >= 0 && opt->type != Opt_tail) { ++ err = au_do_opt_simple(sb, opt, /*remount*/1, given); ++ ++ /* disable it temporary */ ++ dlgt = au_flag_test(sb, AuFlag_DLGT); ++ au_flag_clr(sb, AuFlag_DLGT); ++ ++ if (!err) ++ err = au_do_opt_br(sb, opt, /*remount*/1, do_refresh); ++ if (!err) ++ err = au_do_opt_xino(sb, opt, /*remount*/1, &opt_xino); ++ ++ /* restore it */ ++ au_flag_set(sb, dlgt); ++ opt++; ++ } ++ if (err > 0) ++ err = 0; ++ TraceErr(err); ++ ++ /* go on if err */ ++ ++ //todo: test this error case. ++ au_flag_clr(sb, AuFlag_DLGT); ++ rerr = verify_opts(sb, /*remount*/1); ++ au_flag_set(sb, dlgt); ++ ++ /* they are handled by the caller */ ++ if (!*do_refresh) ++ *do_refresh = !!((*given & AuMask_UDBA) ++ || au_flag_test(sb, AuFlag_XINO)); ++ ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/opts.h b/fs/aufs/opts.h +new file mode 100755 +index 0000000..16c1a6a +--- /dev/null ++++ b/fs/aufs/opts.h +@@ -0,0 +1,96 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: opts.h,v 1.13 2007/05/14 06:27:18 sfjro Exp $ */ ++ ++#ifndef __AUFS_OPTS_H__ ++#define __AUFS_OPTS_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/namei.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) ++typedef const char* au_parser_pattern_t; ++#else ++typedef char* au_parser_pattern_t; ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct opt_add { ++ aufs_bindex_t bindex; ++ char *path; ++ int perm; ++ struct nameidata nd; ++}; ++ ++struct opt_del { ++ char *path; ++ struct dentry *h_root; ++}; ++ ++struct opt_mod { ++ char *path; ++ int perm; ++ struct dentry *h_root; ++}; ++ ++struct opt_xino { ++ char *path; ++ struct file *file; ++}; ++ ++struct opt { ++ int type; ++ union { ++ struct opt_xino xino; ++ struct opt_add add; ++ struct opt_del del; ++ struct opt_mod mod; ++ int dirwh; ++ int rdcache; ++ int deblk; ++ int nhash; ++ int udba; ++ int coo; ++ }; ++}; ++ ++struct opts { ++ struct opt *opt; ++ int max_opt; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++int br_perm_str(char *p, unsigned int len, int brperm); ++au_parser_pattern_t udba_str(int udba); ++void udba_set(struct super_block *sb, unsigned int flg); ++//au_parser_pattern_t coo_str(int coo); ++void au_free_opts(struct opts *opts); ++int au_parse_opts(struct super_block *sb, char *str, struct opts *opts); ++int au_do_opts_mount(struct super_block *sb, struct opts *opts); ++int au_do_opts_remount(struct super_block *sb, struct opts *opts, ++ int *do_refresh, unsigned int *given); ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_OPTS_H__ */ +diff --git a/fs/aufs/plink.c b/fs/aufs/plink.c +new file mode 100755 +index 0000000..0e520af +--- /dev/null ++++ b/fs/aufs/plink.c +@@ -0,0 +1,331 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: plink.c,v 1.4 2007/05/14 03:39:10 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++struct pseudo_link { ++ struct list_head list; ++ struct inode *inode; ++}; ++ ++#ifdef CONFIG_AUFS_DEBUG ++void au_list_plink(struct super_block *sb) ++{ ++ struct aufs_sbinfo *sbinfo; ++ struct list_head *plink_list; ++ struct pseudo_link *plink; ++ ++ TraceEnter(); ++ SiMustAnyLock(sb); ++ DEBUG_ON(!au_flag_test(sb, AuFlag_PLINK)); ++ ++ sbinfo = stosi(sb); ++ plink_list = &sbinfo->si_plink; ++ spin_lock(&sbinfo->si_plink_lock); ++ list_for_each_entry(plink, plink_list, list) ++ Dbg("%lu\n", plink->inode->i_ino); ++ spin_unlock(&sbinfo->si_plink_lock); ++} ++#endif ++ ++int au_is_plinked(struct super_block *sb, struct inode *inode) ++{ ++ int found; ++ struct aufs_sbinfo *sbinfo; ++ struct list_head *plink_list; ++ struct pseudo_link *plink; ++ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ SiMustAnyLock(sb); ++ DEBUG_ON(!au_flag_test(sb, AuFlag_PLINK)); ++ ++ found = 0; ++ sbinfo = stosi(sb); ++ plink_list = &sbinfo->si_plink; ++ spin_lock(&sbinfo->si_plink_lock); ++ list_for_each_entry(plink, plink_list, list) ++ if (plink->inode == inode) { ++ found = 1; ++ break; ++ } ++ spin_unlock(&sbinfo->si_plink_lock); ++ return found; ++} ++ ++// 20 is max digits length of ulong 64 ++#define PLINK_NAME_LEN ((20 + 1) * 2) ++ ++static int plink_name(char *name, int len, struct inode *inode, ++ aufs_bindex_t bindex) ++{ ++ int rlen; ++ struct inode *h_inode; ++ ++ LKTRTrace("i%lu, b%d\n", inode->i_ino, bindex); ++ DEBUG_ON(len != PLINK_NAME_LEN); ++ h_inode = au_h_iptr_i(inode, bindex); ++ DEBUG_ON(!h_inode); ++ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino); ++ DEBUG_ON(rlen >= len); ++ return rlen; ++} ++ ++struct dentry *lkup_plink(struct super_block *sb, aufs_bindex_t bindex, ++ struct inode *inode) ++{ ++ struct dentry *h_dentry, *h_parent; ++ struct aufs_branch *br; ++ struct inode *h_dir; ++ char tgtname[PLINK_NAME_LEN]; ++ int len; ++ struct lkup_args lkup; ++ ++ LKTRTrace("b%d, i%lu\n", bindex, inode->i_ino); ++ br = stobr(sb, bindex); ++ h_parent = br->br_plink; ++ DEBUG_ON(!h_parent); ++ h_dir = h_parent->d_inode; ++ DEBUG_ON(!h_dir); ++ ++ len = plink_name(tgtname, sizeof(tgtname), inode, bindex); ++ ++ // always superio. ++ lkup.nfsmnt = au_do_nfsmnt(br->br_mnt); ++ lkup.dlgt = need_dlgt(sb); ++ hi_lock_whplink(h_dir); ++ h_dentry = sio_lkup_one(tgtname, h_parent, len, &lkup); ++ i_unlock(h_dir); ++ return h_dentry; ++} ++ ++static int do_whplink(char *tgt, int len, struct dentry *h_parent, ++ struct dentry *h_dentry, struct vfsmount *nfsmnt, ++ struct super_block *sb) ++{ ++ int err; ++ struct dentry *h_tgt; ++ struct inode *h_dir; ++ struct lkup_args lkup = { ++ .nfsmnt = nfsmnt, ++ .dlgt = need_dlgt(sb) ++ }; ++ ++ h_tgt = lkup_one(tgt, h_parent, len, &lkup); ++ err = PTR_ERR(h_tgt); ++ if (IS_ERR(h_tgt)) ++ goto out; ++ ++ err = 0; ++ h_dir = h_parent->d_inode; ++ if (unlikely(h_tgt->d_inode && h_tgt->d_inode != h_dentry->d_inode)) ++ err = vfsub_unlink(h_dir, h_tgt, lkup.dlgt); ++ if (!err && !h_tgt->d_inode) { ++ err = vfsub_link(h_dentry, h_dir, h_tgt, lkup.dlgt); ++ //inode->i_nlink++; ++ } ++ dput(h_tgt); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++struct do_whplink_args { ++ int *errp; ++ char *tgt; ++ int len; ++ struct dentry *h_parent; ++ struct dentry *h_dentry; ++ struct vfsmount *nfsmnt; ++ struct super_block *sb; ++}; ++ ++static void call_do_whplink(void *args) ++{ ++ struct do_whplink_args *a = args; ++ *a->errp = do_whplink(a->tgt, a->len, a->h_parent, a->h_dentry, ++ a->nfsmnt, a->sb); ++} ++ ++static int whplink(struct dentry *h_dentry, struct inode *inode, ++ aufs_bindex_t bindex, struct super_block *sb) ++{ ++ int err, len; ++ struct aufs_branch *br; ++ struct dentry *h_parent; ++ struct inode *h_dir; ++ char tgtname[PLINK_NAME_LEN]; ++ ++ LKTRTrace("%.*s\n", DLNPair(h_dentry)); ++ br = stobr(inode->i_sb, bindex); ++ h_parent = br->br_plink; ++ DEBUG_ON(!h_parent); ++ h_dir = h_parent->d_inode; ++ DEBUG_ON(!h_dir); ++ ++ len = plink_name(tgtname, sizeof(tgtname), inode, bindex); ++ ++ // always superio. ++ hi_lock_whplink(h_dir); ++ if (!is_au_wkq(current)) { ++ struct do_whplink_args args = { ++ .errp = &err, ++ .tgt = tgtname, ++ .len = len, ++ .h_parent = h_parent, ++ .h_dentry = h_dentry, ++ .nfsmnt = au_do_nfsmnt(br->br_mnt), ++ .sb = sb ++ }; ++ au_wkq_wait(call_do_whplink, &args, /*dlgt*/0); ++ } else ++ err = do_whplink(tgtname, len, h_parent, h_dentry, ++ au_do_nfsmnt(br->br_mnt), sb); ++ i_unlock(h_dir); ++ ++ TraceErr(err); ++ return err; ++} ++ ++void append_plink(struct super_block *sb, struct inode *inode, ++ struct dentry *h_dentry, aufs_bindex_t bindex) ++{ ++ struct aufs_sbinfo *sbinfo; ++ struct list_head *plink_list; ++ struct pseudo_link *plink; ++ int found, err, cnt; ++ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ SiMustAnyLock(sb); ++ DEBUG_ON(!au_flag_test(sb, AuFlag_PLINK)); ++ ++ cnt = 0; ++ found = 0; ++ sbinfo = stosi(sb); ++ plink_list = &sbinfo->si_plink; ++ spin_lock(&sbinfo->si_plink_lock); ++ list_for_each_entry(plink, plink_list, list) { ++ cnt++; ++ if (plink->inode == inode) { ++ found = 1; ++ break; ++ } ++ } ++ ++ err = 0; ++ if (!found) { ++ struct pseudo_link *plink; ++ ++ plink = kmalloc(sizeof(*plink), GFP_ATOMIC); ++ if (plink) { ++ plink->inode = igrab(inode); ++ list_add(&plink->list, plink_list); ++ cnt++; ++ } else ++ err = -ENOMEM; ++ } ++ spin_unlock(&sbinfo->si_plink_lock); ++ ++ if (!err) ++ err = whplink(h_dentry, inode, bindex, sb); ++ ++ if (unlikely(cnt > 100)) ++ Warn1("unexpectedly many pseudo links, %d\n", cnt); ++ if (unlikely(err)) ++ Warn("err %d, damaged pseudo link. ignored.\n", err); ++} ++ ++static void do_put_plink(struct pseudo_link *plink, int do_del) ++{ ++ TraceEnter(); ++ ++ iput(plink->inode); ++ if (do_del) ++ list_del(&plink->list); ++ kfree(plink); ++} ++ ++void au_put_plink(struct super_block *sb) ++{ ++ struct aufs_sbinfo *sbinfo; ++ struct list_head *plink_list; ++ struct pseudo_link *plink, *tmp; ++ ++ TraceEnter(); ++ SiMustWriteLock(sb); ++ DEBUG_ON(!au_flag_test(sb, AuFlag_PLINK)); ++ ++ sbinfo = stosi(sb); ++ plink_list = &sbinfo->si_plink; ++ //spin_lock(&sbinfo->si_plink_lock); ++ list_for_each_entry_safe(plink, tmp, plink_list, list) ++ do_put_plink(plink, 0); ++ INIT_LIST_HEAD(plink_list); ++ //spin_unlock(&sbinfo->si_plink_lock); ++} ++ ++void half_refresh_plink(struct super_block *sb, aufs_bindex_t br_id) ++{ ++ struct aufs_sbinfo *sbinfo; ++ struct list_head *plink_list; ++ struct pseudo_link *plink, *tmp; ++ struct inode *inode; ++ aufs_bindex_t bstart, bend, bindex; ++ int do_put; ++ ++ TraceEnter(); ++ SiMustWriteLock(sb); ++ DEBUG_ON(!au_flag_test(sb, AuFlag_PLINK)); ++ ++ sbinfo = stosi(sb); ++ plink_list = &sbinfo->si_plink; ++ //spin_lock(&sbinfo->si_plink_lock); ++ list_for_each_entry_safe(plink, tmp, plink_list, list) { ++ do_put = 0; ++ inode = igrab(plink->inode); ++ ii_write_lock_child(inode); ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ if (bstart >= 0) { ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ if (!au_h_iptr_i(inode, bindex) ++ || itoid_index(inode, bindex) != br_id) ++ continue; ++ set_h_iptr(inode, bindex, NULL, 0); ++ do_put = 1; ++ break; ++ } ++ } else ++ do_put_plink(plink, 1); ++ ++ if (do_put) { ++ for (bindex = bstart; bindex <= bend; bindex++) ++ if (au_h_iptr_i(inode, bindex)) { ++ do_put = 0; ++ break; ++ } ++ if (do_put) ++ do_put_plink(plink, 1); ++ } ++ ii_write_unlock(inode); ++ iput(inode); ++ } ++ //spin_unlock(&sbinfo->si_plink_lock); ++} +diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c +new file mode 100755 +index 0000000..55cb64c +--- /dev/null ++++ b/fs/aufs/sbinfo.c +@@ -0,0 +1,173 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: sbinfo.c,v 1.30 2007/05/14 03:39:31 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++struct aufs_sbinfo *stosi(struct super_block *sb) ++{ ++ struct aufs_sbinfo *sbinfo; ++ sbinfo = sb->s_fs_info; ++ //DEBUG_ON(sbinfo->si_bend < 0); ++ return sbinfo; ++} ++ ++aufs_bindex_t sbend(struct super_block *sb) ++{ ++ SiMustAnyLock(sb); ++ return stosi(sb)->si_bend; ++} ++ ++struct aufs_branch *stobr(struct super_block *sb, aufs_bindex_t bindex) ++{ ++ SiMustAnyLock(sb); ++ DEBUG_ON(bindex < 0 || sbend(sb) < bindex ++ || !stosi(sb)->si_branch[0 + bindex]); ++ return stosi(sb)->si_branch[0 + bindex]; ++} ++ ++int au_sigen(struct super_block *sb) ++{ ++ SiMustAnyLock(sb); ++ return stosi(sb)->si_generation; ++} ++ ++int au_sigen_inc(struct super_block *sb) ++{ ++ int gen; ++ ++ SiMustWriteLock(sb); ++ gen = ++stosi(sb)->si_generation; ++ au_update_digen(sb->s_root); ++ au_update_iigen(sb->s_root->d_inode); ++ sb->s_root->d_inode->i_version++; ++ return gen; ++} ++ ++int find_bindex(struct super_block *sb, struct aufs_branch *br) ++{ ++ aufs_bindex_t bindex, bend; ++ ++ bend = sbend(sb); ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (stobr(sb, bindex) == br) ++ return bindex; ++ return -1; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* dentry and super_block lock. call at entry point */ ++void aufs_read_lock(struct dentry *dentry, int flags) ++{ ++ si_read_lock(dentry->d_sb); ++ if (flags & AUFS_D_WLOCK) ++ di_write_lock_child(dentry); ++ else ++ di_read_lock_child(dentry, flags); ++} ++ ++void aufs_read_unlock(struct dentry *dentry, int flags) ++{ ++ if (flags & AUFS_D_WLOCK) ++ di_write_unlock(dentry); ++ else ++ di_read_unlock(dentry, flags); ++ si_read_unlock(dentry->d_sb); ++} ++ ++void aufs_write_lock(struct dentry *dentry) ++{ ++ //au_wkq_wait_nwtask(); ++ si_write_lock(dentry->d_sb); ++ di_write_lock_child(dentry); ++} ++ ++void aufs_write_unlock(struct dentry *dentry) ++{ ++ di_write_unlock(dentry); ++ si_write_unlock(dentry->d_sb); ++ //au_wkq_wait_nwtask(); ++} ++ ++void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int isdir) ++{ ++ DEBUG_ON(d1 == d2 || d1->d_sb != d2->d_sb); ++ si_read_lock(d1->d_sb); ++ di_write_lock2_child(d1, d2, isdir); ++} ++ ++void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2) ++{ ++ DEBUG_ON(d1 == d2 || d1->d_sb != d2->d_sb); ++ di_write_unlock2(d1, d2); ++ si_read_unlock(d1->d_sb); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++aufs_bindex_t new_br_id(struct super_block *sb) ++{ ++ aufs_bindex_t br_id; ++ ++ TraceEnter(); ++ SiMustWriteLock(sb); ++ ++ while (1) { ++ br_id = ++stosi(sb)->si_last_br_id; ++ if (br_id && find_brindex(sb, br_id) < 0) ++ return br_id; ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_SYSAUFS ++static int make_xino(struct seq_file *seq, struct sysaufs_args *args, ++ int *do_size) ++{ ++ int err; ++ struct super_block *sb = args->sb; ++ aufs_bindex_t bindex, bend; ++ struct file *xf; ++ struct inode *xi; ++ ++ TraceEnter(); ++ DEBUG_ON(args->index != SysaufsSb_XINO); ++ SiMustReadLock(sb); ++ ++ *do_size = 0; ++ err = seq_printf(seq, "%d %lu\n", sizeof(struct xino), ++ atomic_long_read(&stosi(sb)->si_xino)); ++ bend = sbend(sb); ++ for (bindex = 0; !err && bindex <= bend; bindex++) { ++ xf = stobr(sb, bindex)->br_xino; ++ xi = xf->f_dentry->d_inode; ++ err = seq_printf(seq, "%d: %d, %Lux%d %Ld\n", ++ bindex, file_count(xf), ++ (u64)xi->i_blocks, 1 << xi->i_blkbits, ++ i_size_read(xi)); ++ } ++ return err; ++} ++ ++sysaufs_op au_si_ops[] = { ++ [SysaufsSb_XINO] = make_xino ++}; ++#endif +diff --git a/fs/aufs/super.c b/fs/aufs/super.c +new file mode 100755 +index 0000000..c1123f8 +--- /dev/null ++++ b/fs/aufs/super.c +@@ -0,0 +1,716 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: super.c,v 1.50 2007/05/14 03:39:42 sfjro Exp $ */ ++ ++#include <linux/module.h> ++#include <linux/seq_file.h> ++#include <linux/statfs.h> ++#include "aufs.h" ++ ++/* ++ * super_operations ++ */ ++static struct inode *aufs_alloc_inode(struct super_block *sb) ++{ ++ struct aufs_icntnr *c; ++ ++ TraceEnter(); ++ ++ c = cache_alloc_icntnr(); ++ //if (LktrCond) {cache_free_icntnr(c); c = NULL;} ++ if (c) { ++ inode_init_once(&c->vfs_inode); ++ c->vfs_inode.i_version = 1; //sigen(sb); ++ c->iinfo.ii_hinode = NULL; ++ return &c->vfs_inode; ++ } ++ return NULL; ++} ++ ++static void aufs_destroy_inode(struct inode *inode) ++{ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ au_iinfo_fin(inode); ++ cache_free_icntnr(container_of(inode, struct aufs_icntnr, vfs_inode)); ++} ++ ++//todo: how about merge with alloc_inode()? ++static void aufs_read_inode(struct inode *inode) ++{ ++ int err; ++ ++ LKTRTrace("i%lu\n", inode->i_ino); ++ ++ err = au_iinfo_init(inode); ++ //if (LktrCond) err = -1; ++ if (!err) { ++ inode->i_version++; ++ inode->i_op = &aufs_iop; ++ inode->i_fop = &aufs_file_fop; ++ inode->i_mapping->a_ops = &aufs_aop; ++ return; /* success */ ++ } ++ ++ LKTRTrace("intializing inode info failed(%d)\n", err); ++ make_bad_inode(inode); ++} ++ ++int au_show_brs(struct seq_file *seq, struct super_block *sb) ++{ ++ int err; ++ aufs_bindex_t bindex, bend; ++ char a[16]; ++ struct dentry *root; ++ ++ TraceEnter(); ++ SiMustAnyLock(sb); ++ root = sb->s_root; ++ DiMustAnyLock(root); ++ ++ err = 0; ++ bend = sbend(sb); ++ for (bindex = 0; !err && bindex <= bend; bindex++) { ++ err = br_perm_str(a, sizeof(a), sbr_perm(sb, bindex)); ++ if (!err) ++ err = seq_path(seq, sbr_mnt(sb, bindex), ++ au_h_dptr_i(root, bindex), au_esc_chars); ++ if (err > 0) ++ err = seq_printf(seq, "=%s", a); ++ if (!err && bindex != bend) ++ err = seq_putc(seq, ':'); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int aufs_show_options(struct seq_file *m, struct vfsmount *mnt) ++{ ++ int err, n; ++ struct super_block *sb; ++ struct aufs_sbinfo *sbinfo; ++ struct dentry *root; ++ struct file *xino; ++ ++ TraceEnter(); ++ ++ sb = mnt->mnt_sb; ++ root = sb->s_root; ++ aufs_read_lock(root, !AUFS_I_RLOCK); ++ if (au_flag_test(sb, AuFlag_XINO)) { ++ err = seq_puts(m, ",xino="); ++ if (unlikely(err)) ++ goto out; ++ xino = stobr(sb, 0)->br_xino; ++ err = seq_path(m, xino->f_vfsmnt, xino->f_dentry, au_esc_chars); ++ if (unlikely(err <= 0)) ++ goto out; ++ err = 0; ++ ++#define Deleted "\\040(deleted)" ++ m->count -= sizeof(Deleted) - 1; ++ DEBUG_ON(memcmp(m->buf + m->count, Deleted, ++ sizeof(Deleted) - 1)); ++#undef Deleted ++ } else ++ err = seq_puts(m, ",noxino"); ++ ++ n = au_flag_test(sb, AuFlag_PLINK); ++ if (unlikely(!err && (AuDefFlags & AuFlag_PLINK) != n)) ++ err = seq_printf(m, ",%splink", n ? "" : "no"); ++ n = au_flag_test_udba(sb); ++ if (unlikely(!err && (AuDefFlags & AuMask_UDBA) != n)) ++ err = seq_printf(m, ",udba=%s", udba_str(n)); ++ n = au_flag_test(sb, AuFlag_ALWAYS_DIROPQ); ++ if (unlikely(!err && (AuDefFlags & AuFlag_ALWAYS_DIROPQ) != n)) ++ err = seq_printf(m, ",diropq=%c", n ? 'a' : 'w'); ++ n = au_flag_test(sb, AuFlag_DLGT); ++ if (unlikely(!err && (AuDefFlags & AuFlag_DLGT) != n)) ++ err = seq_printf(m, ",%sdlgt", n ? "" : "no"); ++ n = au_flag_test(sb, AuFlag_WARN_PERM); ++ if (unlikely(!err && (AuDefFlags & AuFlag_WARN_PERM) != n)) ++ err = seq_printf(m, ",%swarn_perm", n ? "" : "no"); ++ ++ sbinfo = stosi(sb); ++ n = sbinfo->si_dirwh; ++ if (unlikely(!err && n != AUFS_DIRWH_DEF)) ++ err = seq_printf(m, ",dirwh=%d", n); ++ n = sbinfo->si_rdcache / HZ; ++ if (unlikely(!err && n != AUFS_RDCACHE_DEF)) ++ err = seq_printf(m, ",rdcache=%d", n); ++#if 0 ++ n = au_flag_test_coo(sb); ++ if (unlikely(!err && (AuDefFlags & AuMask_COO) != n)) ++ err = seq_printf(m, ",coo=%s", coo_str(n)); ++#endif ++ ++ if (!err && !sysaufs_brs) { ++#ifdef CONFIG_AUFS_COMPAT ++ err = seq_puts(m, ",dirs="); ++#else ++ err = seq_puts(m, ",br:"); ++#endif ++ if (!err) ++ err = au_show_brs(m, sb); ++ } ++ ++ out: ++ aufs_read_unlock(root, !AUFS_I_RLOCK); ++ TraceErr(err); ++ if (err) ++ err = -E2BIG; ++ TraceErr(err); ++ return err; ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++#define StatfsLock(d) aufs_read_lock((d)->d_sb->s_root, 0) ++#define StatfsUnlock(d) aufs_read_unlock((d)->d_sb->s_root, 0) ++#define StatfsArg(d) au_h_dptr((d)->d_sb->s_root) ++#define StatfsHInode(d) (StatfsArg(d)->d_inode) ++#define StatfsSb(d) ((d)->d_sb) ++static int aufs_statfs(struct dentry *arg, struct kstatfs *buf) ++#else ++#define StatfsLock(s) si_read_lock(s) ++#define StatfsUnlock(s) si_read_unlock(s) ++#define StatfsArg(s) sbr_sb(s, 0) ++#define StatfsHInode(s) (StatfsArg(s)->s_root->d_inode) ++#define StatfsSb(s) (s) ++static int aufs_statfs(struct super_block *arg, struct kstatfs *buf) ++#endif ++{ ++ int err; ++ ++ TraceEnter(); ++ ++ StatfsLock(arg); ++ err = vfsub_statfs(StatfsArg(arg), buf, need_dlgt(StatfsSb(arg))); ++ //if (LktrCond) err = -1; ++ StatfsUnlock(arg); ++ if (!err) { ++ //buf->f_type = AUFS_SUPER_MAGIC; ++ buf->f_type = 0; ++ buf->f_namelen -= AUFS_WH_PFX_LEN; ++ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid)); ++ } ++ //buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; ++ ++ TraceErr(err); ++ return err; ++} ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) || defined(UbuntuEdgy17Umount18) ++#define UmountBeginSb(mnt) (mnt)->mnt_sb ++static void aufs_umount_begin(struct vfsmount *arg, int flags) ++#else ++#define UmountBeginSb(sb) sb ++static void aufs_umount_begin(struct super_block *arg) ++#endif ++{ ++ struct super_block *sb = UmountBeginSb(arg); ++ ++ if (unlikely(!stosi(sb))) ++ return; ++ ++ //au_wkq_wait_nwtask(); ++ si_write_lock(sb); ++ if (au_flag_test(sb, AuFlag_PLINK)) { ++ au_put_plink(sb); ++ //kobj_umount(stosi(sb)); ++ } ++#if 0 ++ if (unlikely(au_flag_test(sb, AuFlag_UDBA_INOTIFY))) ++ shrink_dcache_sb(sb); ++#endif ++ si_write_unlock(sb); ++} ++ ++static void free_sbinfo(struct aufs_sbinfo *sbinfo) ++{ ++ TraceEnter(); ++ DEBUG_ON(!sbinfo ++ || !list_empty(&sbinfo->si_plink)); ++ ++ free_branches(sbinfo); ++ kfree(sbinfo->si_branch); ++ kfree(sbinfo); ++} ++ ++/* final actions when unmounting a file system */ ++static void aufs_put_super(struct super_block *sb) ++{ ++ struct aufs_sbinfo *sbinfo; ++ ++ TraceEnter(); ++ ++ sbinfo = stosi(sb); ++ if (unlikely(!sbinfo)) ++ return; ++ ++ sysaufs_del(sbinfo); ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) && !defined(UbuntuEdgy17Umount18) ++ // umount_begin() may not be called. ++ aufs_umount_begin(sb); ++#endif ++ free_sbinfo(sbinfo); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * refresh directories at remount time. ++ */ ++static int do_refresh_dir(struct dentry *dentry, unsigned int flags) ++{ ++ int err; ++ struct dentry *parent; ++ struct inode *inode; ++ ++ LKTRTrace("%.*s\n", DLNPair(dentry)); ++ inode = dentry->d_inode; ++ DEBUG_ON(!inode || !S_ISDIR(inode->i_mode)); ++ ++ di_write_lock_child(dentry); ++ parent = dget_parent(dentry); ++ di_read_lock_parent(parent, AUFS_I_RLOCK); ++ err = au_refresh_hdentry(dentry, S_IFDIR); ++ if (err >= 0) { ++ err = au_refresh_hinode(inode, dentry); ++ if (!err) ++ au_reset_hinotify(inode, flags); ++ } ++ if (unlikely(err)) ++ Err("unrecoverable error %d\n", err); ++ di_read_unlock(parent, AUFS_I_RLOCK); ++ dput(parent); ++ di_write_unlock(dentry); ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int test_dir(struct dentry *dentry, void *arg) ++{ ++ return S_ISDIR(dentry->d_inode->i_mode); ++} ++ ++static int refresh_dir(struct dentry *root, int sgen) ++{ ++ int err, i, j, ndentry; ++ const unsigned int flags = au_hi_flags(root->d_inode, /*isdir*/1); ++ struct au_dcsub_pages dpages; ++ struct au_dpage *dpage; ++ struct dentry **dentries; ++ ++ LKTRTrace("sgen %d\n", sgen); ++ SiMustWriteLock(root->d_sb); ++ DEBUG_ON(au_digen(root) != sgen); ++ DiMustWriteLock(root); ++ ++ err = au_dpages_init(&dpages, GFP_KERNEL); ++ if (unlikely(err)) ++ goto out; ++ err = au_dcsub_pages(&dpages, root, test_dir, NULL); ++ if (unlikely(err)) ++ goto out_dpages; ++ ++ DiMustNoWaiters(root); ++ IiMustNoWaiters(root->d_inode); ++ di_write_unlock(root); ++ for (i = 0; !err && i < dpages.ndpage; i++) { ++ dpage = dpages.dpages + i; ++ dentries = dpage->dentries; ++ ndentry = dpage->ndentry; ++ for (j = 0; !err && j < ndentry; j++) { ++ struct dentry *d; ++ d = dentries[j]; ++ DEBUG_ON(!S_ISDIR(d->d_inode->i_mode) ++ || IS_ROOT(d) ++ || au_digen(d->d_parent) != sgen); ++ if (au_digen(d) != sgen) ++ err = do_refresh_dir(d, flags); ++ } ++ } ++ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */ ++ ++ out_dpages: ++ au_dpages_free(&dpages); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* stop extra interpretation of errno in mount(8), and strange error messages */ ++static int cvt_err(int err) ++{ ++ TraceErr(err); ++ ++ switch (err) { ++ case -ENOENT: ++ case -ENOTDIR: ++ case -EEXIST: ++ case -EIO: ++ err = -EINVAL; ++ } ++ return err; ++} ++ ++/* protected by s_umount */ ++static int aufs_remount_fs(struct super_block *sb, int *flags, char *data) ++{ ++ int err, do_refresh; ++ struct dentry *root; ++ struct inode *inode; ++ struct opts opts; ++ unsigned int given, dlgt; ++ ++ //au_debug_on(); ++ LKTRTrace("flags 0x%x, data %s, len %d\n", ++ *flags, data ? data : "NULL", data ? strlen(data) : 0); ++ ++ err = 0; ++ if (unlikely(!data || !*data)) ++ goto out; /* success */ ++ ++ err = -ENOMEM; ++ memset(&opts, 0, sizeof(opts)); ++ opts.opt = (void*)__get_free_page(GFP_KERNEL); ++ //if (LktrCond) {free_page((unsigned long)opts.opt); opts.opt = NULL;} ++ if (unlikely(!opts.opt)) ++ goto out; ++ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt); ++ ++ /* parse it before aufs lock */ ++ err = au_parse_opts(sb, data, &opts); ++ //if (LktrCond) {au_free_opts(&opts); err = -1;} ++ if (unlikely(err)) ++ goto out_opts; ++ ++ root = sb->s_root; ++ inode = root->d_inode; ++ i_lock(inode); ++ aufs_write_lock(root); ++ ++ //DbgSleep(3); ++ ++ /* au_do_opts() may return an error */ ++ do_refresh = 0; ++ given = 0; ++ err = au_do_opts_remount(sb, &opts, &do_refresh, &given); ++ //if (LktrCond) err = -1; ++ au_free_opts(&opts); ++ ++ if (do_refresh) { ++ int rerr; ++ struct aufs_sbinfo *sbinfo; ++ ++ dlgt = au_flag_test(sb, AuFlag_DLGT); ++ au_flag_clr(sb, AuFlag_DLGT); ++ au_sigen_inc(sb); ++ au_reset_hinotify(inode, au_hi_flags(inode, /*isdir*/1)); ++ sbinfo = stosi(sb); ++ sbinfo->si_failed_refresh_dirs = 0; ++ rerr = refresh_dir(root, au_sigen(sb)); ++ if (unlikely(rerr)) { ++ sbinfo->si_failed_refresh_dirs = 1; ++ Warn("Refreshing directories failed, ignores (%d)\n", ++ rerr); ++ } ++ au_cpup_attr_all(inode); ++ au_flag_set(sb, dlgt); ++ } ++ ++ aufs_write_unlock(root); ++ i_unlock(inode); ++ /* braces are added to stop a warning */ ++ if (do_refresh) { ++ sysaufs_notify_remount(); ++ } ++ ++ out_opts: ++ free_page((unsigned long)opts.opt); ++ out: ++ err = cvt_err(err); ++ TraceErr(err); ++ //au_debug_off(); ++ return err; ++} ++ ++static struct super_operations aufs_sop = { ++ .alloc_inode = aufs_alloc_inode, ++ .destroy_inode = aufs_destroy_inode, ++ .read_inode = aufs_read_inode, ++ //.dirty_inode = aufs_dirty_inode, ++ //.write_inode = aufs_write_inode, ++ //void (*put_inode) (struct inode *); ++ .drop_inode = generic_delete_inode, ++ //.delete_inode = aufs_delete_inode, ++ //.clear_inode = aufs_clear_inode, ++ ++ .show_options = aufs_show_options, ++ .statfs = aufs_statfs, ++ ++ .put_super = aufs_put_super, ++ //void (*write_super) (struct super_block *); ++ //int (*sync_fs)(struct super_block *sb, int wait); ++ //void (*write_super_lockfs) (struct super_block *); ++ //void (*unlockfs) (struct super_block *); ++ .remount_fs = aufs_remount_fs, ++ // depends upon umount flags. also use put_super() (< 2.6.18) ++ .umount_begin = aufs_umount_begin ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * at first mount time. ++ */ ++ ++static int alloc_sbinfo(struct super_block *sb) ++{ ++ struct aufs_sbinfo *sbinfo; ++ ++ TraceEnter(); ++ ++ sbinfo = kmalloc(sizeof(*sbinfo), GFP_KERNEL); ++ //if (LktrCond) {kfree(sbinfo); sbinfo = NULL;} ++ if (unlikely(!sbinfo)) ++ goto out; ++ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_KERNEL); ++ //if (LktrCond) {kfree(sbinfo->si_branch); sbinfo->si_branch = NULL;} ++ if (unlikely(!sbinfo->si_branch)) { ++ kfree(sbinfo); ++ goto out; ++ } ++ rw_init_wlock(&sbinfo->si_rwsem); ++ sbinfo->si_bend = -1; ++ atomic_long_set(&sbinfo->si_xino, AUFS_FIRST_INO); ++ spin_lock_init(&sbinfo->si_plink_lock); ++ INIT_LIST_HEAD(&sbinfo->si_plink); ++ init_lvma(sbinfo); ++ sbinfo->si_generation = 0; ++ sbinfo->si_last_br_id = 0; ++ sbinfo->si_failed_refresh_dirs = 0; ++ sbinfo->si_flags = 0; ++ sbinfo->si_dirwh = AUFS_DIRWH_DEF; ++ sbinfo->si_rdcache = AUFS_RDCACHE_DEF * HZ; ++ //atomic_set(&sbinfo->si_hinotify, 0); ++ //init_waitqueue_head(&sbinfo->si_hinotify_wq); ++ ++ sb->s_fs_info = sbinfo; ++ au_flag_set(sb, AuDefFlags); ++#ifdef ForceInotify ++ udba_set(sb, AuFlag_UDBA_INOTIFY); ++#endif ++#ifdef ForceDlgt ++ au_flag_set(sb, AuFlag_DLGT); ++#endif ++#ifdef ForceNoPlink ++ au_flag_clr(sb, AuFlag_PLINK); ++#endif ++ return 0; /* success */ ++ ++ out: ++ TraceErr(-ENOMEM); ++ return -ENOMEM; ++} ++ ++static int alloc_root(struct super_block *sb) ++{ ++ int err; ++ struct inode *inode; ++ struct dentry *root; ++ ++ TraceEnter(); ++ ++ err = -ENOMEM; ++ inode = iget(sb, AUFS_ROOT_INO); ++ //if (LktrCond) {iput(inode); inode = NULL;} ++ if (unlikely(!inode)) ++ goto out; ++ err = PTR_ERR(inode); ++ if (IS_ERR(inode)) ++ goto out; ++ err = -ENOMEM; ++ if (unlikely(is_bad_inode(inode))) ++ goto out_iput; ++ ++ root = d_alloc_root(inode); ++ //if (LktrCond) {igrab(inode); dput(root); root = NULL;} ++ if (unlikely(!root)) ++ goto out_iput; ++ err = PTR_ERR(root); ++ if (IS_ERR(root)) ++ goto out_iput; ++ ++ err = au_alloc_dinfo(root); ++ //if (LktrCond){rw_write_unlock(&dtodi(root)->di_rwsem);err=-1;} ++ if (!err) { ++ sb->s_root = root; ++ return 0; /* success */ ++ } ++ dput(root); ++ goto out; /* do not iput */ ++ ++ out_iput: ++ iput(inode); ++ out: ++ TraceErr(err); ++ return err; ++ ++} ++ ++static int aufs_fill_super(struct super_block *sb, void *raw_data, int silent) ++{ ++ int err; ++ struct dentry *root; ++ struct inode *inode; ++ struct opts opts; ++ char *arg = raw_data; ++ ++ //au_debug_on(); ++ if (unlikely(!arg || !*arg)) { ++ err = -EINVAL; ++ Err("no arg\n"); ++ goto out; ++ } ++ LKTRTrace("%s, silent %d\n", arg, silent); ++ ++ err = -ENOMEM; ++ memset(&opts, 0, sizeof(opts)); ++ opts.opt = (void*)__get_free_page(GFP_KERNEL); ++ //if (LktrCond) {free_page((unsigned long)opts.opt); opts.opt = NULL;} ++ if (unlikely(!opts.opt)) ++ goto out; ++ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt); ++ ++ err = alloc_sbinfo(sb); ++ //if (LktrCond) {si_write_unlock(sb);free_sbinfo(stosi(sb));err=-1;} ++ if (unlikely(err)) ++ goto out_opts; ++ SiMustWriteLock(sb); ++ /* all timestamps always follow the ones on the branch */ ++ sb->s_flags |= MS_NOATIME | MS_NODIRATIME; ++ sb->s_op = &aufs_sop; ++ au_init_export_op(sb); ++ //err = kobj_mount(stosi(sb)); ++ //if (err) ++ //goto out_info; ++ ++ err = alloc_root(sb); ++ //if (LktrCond) {rw_write_unlock(&dtodi(sb->s_root)->di_rwsem); ++ //dput(sb->s_root);sb->s_root=NULL;err=-1;} ++ if (unlikely(err)) { ++ DEBUG_ON(sb->s_root); ++ si_write_unlock(sb); ++ goto out_info; ++ } ++ root = sb->s_root; ++ DiMustWriteLock(root); ++ inode = root->d_inode; ++ inode->i_nlink = 2; ++ ++ /* ++ * actually we can parse options regardless aufs lock here. ++ * but at remount time, parsing must be done before aufs lock. ++ * so we follow the same rule. ++ */ ++ ii_write_lock_parent(inode); ++ aufs_write_unlock(root); ++ err = au_parse_opts(sb, arg, &opts); ++ //if (LktrCond) {au_free_opts(&opts); err = -1;} ++ if (unlikely(err)) ++ goto out_root; ++ ++ /* lock vfs_inode first, then aufs. */ ++ i_lock(inode); ++ inode->i_op = &aufs_dir_iop; ++ inode->i_fop = &aufs_dir_fop; ++ aufs_write_lock(root); ++ ++ sb->s_maxbytes = 0; ++ err = au_do_opts_mount(sb, &opts); ++ //if (LktrCond) err = -1; ++ au_free_opts(&opts); ++ if (unlikely(err)) ++ goto out_unlock; ++ DEBUG_ON(!sb->s_maxbytes); ++ ++ //DbgDentry(root); ++ aufs_write_unlock(root); ++ i_unlock(inode); ++ //DbgSb(sb); ++ goto out_opts; /* success */ ++ ++ out_unlock: ++ aufs_write_unlock(root); ++ i_unlock(inode); ++ out_root: ++ dput(root); ++ sb->s_root = NULL; ++ out_info: ++ free_sbinfo(stosi(sb)); ++ sb->s_fs_info = NULL; ++ out_opts: ++ free_page((unsigned long)opts.opt); ++ out: ++ TraceErr(err); ++ err = cvt_err(err); ++ TraceErr(err); ++ //au_debug_off(); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++static int aufs_get_sb(struct file_system_type *fs_type, int flags, ++ const char *dev_name, void *raw_data, ++ struct vfsmount *mnt) ++{ ++ int err; ++ ++ /* all timestamps always follow the ones on the branch */ ++ //mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; ++ err = get_sb_nodev(fs_type, flags, raw_data, aufs_fill_super, mnt); ++ if (!err) { ++ struct aufs_sbinfo *sbinfo = stosi(mnt->mnt_sb); ++ sbinfo->si_mnt = mnt; ++ sysaufs_add(sbinfo); ++ } ++ return err; ++} ++#else ++static struct super_block *aufs_get_sb(struct file_system_type *fs_type, ++ int flags, const char *dev_name, ++ void *raw_data) ++{ ++ return get_sb_nodev(fs_type, flags, raw_data, aufs_fill_super); ++} ++#endif ++ ++struct file_system_type aufs_fs_type = { ++ .name = AUFS_FSTYPE, ++ .fs_flags = FS_REVAL_DOT, // for UDBA and NFS branch ++ .get_sb = aufs_get_sb, ++ .kill_sb = generic_shutdown_super, ++ //no need to __module_get() and module_put(). ++ .owner = THIS_MODULE, ++}; +diff --git a/fs/aufs/super.h b/fs/aufs/super.h +new file mode 100755 +index 0000000..56ddee1 +--- /dev/null ++++ b/fs/aufs/super.h +@@ -0,0 +1,339 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: super.h,v 1.44 2007/05/14 03:39:54 sfjro Exp $ */ ++ ++#ifndef __AUFS_SUPER_H__ ++#define __AUFS_SUPER_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/version.h> ++#include <linux/aufs_type.h> ++#include "misc.h" ++#include "sysaufs.h" ++ ++#ifdef CONFIG_AUFS_SYSAUFS ++/* entries under sysfs per mount-point */ ++enum {SysaufsSb_XINO, /* SysaufsSb_PLINK, */ SysaufsSb_Last}; ++struct sysaufs_sbinfo { ++ au_subsys_t subsys; ++ struct sysaufs_entry array[SysaufsSb_Last]; ++}; ++extern sysaufs_op au_si_ops[]; ++#else ++struct sysaufs_sbinfo {}; ++#endif ++ ++struct aufs_sbinfo { ++ struct aufs_rwsem si_rwsem; ++ ++ /* branch management */ ++ /* wrap around attack by superuser? No. */ ++ int si_generation; ++ ++ /* ++ * set true when refresh_dirs() at remount time failed. ++ * then try refreshing dirs at access time again. ++ * if it is false, refreshing dirs at access time is unnecesary ++ */ ++ unsigned int si_failed_refresh_dirs:1; ++ ++ aufs_bindex_t si_bend; ++ aufs_bindex_t si_last_br_id; ++ struct aufs_branch **si_branch; ++ ++ /* mount flags */ ++ unsigned int si_flags; ++ ++ /* external inode number table */ ++ atomic_long_t si_xino; // time bomb ++ //struct file *si_xino_bmap; ++ ++ /* readdir cache time, max, in HZ */ ++ unsigned long si_rdcache; ++ ++ /* ++ * If the number of whiteouts are larger than si_dirwh, leave all of ++ * them after rename_whtmp to reduce the cost of rmdir(2). ++ * future fsck.aufs or kernel thread will remove them later. ++ * Otherwise, remove all whiteouts and the dir in rmdir(2). ++ */ ++ unsigned int si_dirwh; ++ ++ /* pseudo_link list */ // dirty ++ spinlock_t si_plink_lock; ++ struct list_head si_plink; ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++ /* super_blocks list is not exported */ ++ struct list_head si_list; ++ struct vfsmount *si_mnt; /* no get/put */ ++#endif ++ ++ /* sysfs */ ++ struct sysaufs_sbinfo si_sysaufs; ++ ++#ifdef CONFIG_AUFS_HINOTIFY ++ /* hinotify */ ++ //atomic_t si_hinotify; ++ //wait_queue_head_t si_hinotify_wq; ++#endif ++ ++#ifdef CONFIG_AUFS_ROBR ++ /* locked vma list for mmap() */ // very dirty ++ spinlock_t si_lvma_lock; ++ struct list_head si_lvma; ++#endif ++}; ++ ++/* an entry in a xino file */ ++struct xino { ++ ino_t ino; ++ //__u32 h_gen; ++} __attribute__ ((packed)); ++ ++//#define AuXino_INVALID_HGEN (-1) ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* Mount flags */ ++#define AuFlag_XINO 1 ++#define AuFlag_ZXINO (1 << 1) ++#define AuFlag_PLINK (1 << 2) ++#define AuFlag_UDBA_NONE (1 << 3) ++#define AuFlag_UDBA_REVAL (1 << 4) ++#define AuFlag_UDBA_INOTIFY (1 << 5) ++#define AuFlag_WARN_PERM (1 << 6) ++#define AuFlag_COO_NONE (1 << 7) ++#define AuFlag_COO_LEAF (1 << 8) ++#define AuFlag_COO_ALL (1 << 9) ++#define AuFlag_ALWAYS_DIROPQ (1 << 10) ++#define AuFlag_DLGT (1 << 11) ++ ++#define AuMask_UDBA (AuFlag_UDBA_NONE | AuFlag_UDBA_REVAL \ ++ | AuFlag_UDBA_INOTIFY) ++#define AuMask_COO (AuFlag_COO_NONE | AuFlag_COO_LEAF \ ++ | AuFlag_COO_ALL) ++ ++#ifdef CONFIG_AUFS_COMPAT ++#define AuDefFlag_DIROPQ AuFlag_ALWAYS_DIROPQ ++#else ++#define AuDefFlag_DIROPQ 0 ++#endif ++ ++#define AuDefFlags_COMM (AuFlag_XINO | AuFlag_UDBA_REVAL | AuFlag_WARN_PERM \ ++ | AuFlag_COO_NONE | AuDefFlag_DIROPQ) ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) ++#define AuDefFlags (AuDefFlags_COMM | AuFlag_PLINK) ++#else ++#define AuDefFlags AuDefFlags_COMM ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* flags for aufs_read_lock()/di_read_lock() */ ++#define AUFS_D_WLOCK 1 ++#define AUFS_I_RLOCK 2 ++#define AUFS_I_WLOCK 4 ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* super.c */ ++int au_show_brs(struct seq_file *seq, struct super_block *sb); ++ ++/* xino.c */ ++struct file *xino_create(struct super_block *sb, char *fname, int silent, ++ struct dentry *parent); ++ino_t xino_new_ino(struct super_block *sb); ++int xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino); ++int xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ struct xino *xino); ++int xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ struct xino *xino); ++int xino_init(struct super_block *sb, aufs_bindex_t bindex, ++ struct file *base_file, int do_test); ++struct opt_xino; ++int xino_set(struct super_block *sb, struct opt_xino *xino, int remount); ++int xino_clr(struct super_block *sb); ++struct file *xino_def(struct super_block *sb); ++ ++/* sbinfo.c */ ++struct aufs_sbinfo *stosi(struct super_block *sb); ++aufs_bindex_t sbend(struct super_block *sb); ++struct aufs_branch *stobr(struct super_block *sb, aufs_bindex_t bindex); ++int au_sigen(struct super_block *sb); ++int au_sigen_inc(struct super_block *sb); ++int find_bindex(struct super_block *sb, struct aufs_branch *br); ++ ++void aufs_read_lock(struct dentry *dentry, int flags); ++void aufs_read_unlock(struct dentry *dentry, int flags); ++void aufs_write_lock(struct dentry *dentry); ++void aufs_write_unlock(struct dentry *dentry); ++void aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int isdir); ++void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2); ++ ++aufs_bindex_t new_br_id(struct super_block *sb); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline const char *au_sbtype(struct super_block *sb) ++{ ++ return sb->s_type->name; ++} ++ ++static inline int au_is_aufs(struct super_block *sb) ++{ ++ return !strcmp(au_sbtype(sb), AUFS_FSTYPE); ++} ++ ++static inline int au_is_nfs(struct super_block *sb) ++{ ++#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) ++ return !strcmp(au_sbtype(sb), "nfs"); ++#else ++ return 0; ++#endif ++} ++ ++static inline int au_is_remote(struct super_block *sb) ++{ ++ return au_is_nfs(sb); ++} ++ ++#ifdef CONFIG_AUFS_EXPORT ++static inline void au_init_export_op(struct super_block *sb) ++{ ++ extern struct export_operations aufs_export_op; ++ sb->s_export_op = &aufs_export_op; ++} ++ ++static inline int au_is_nfsd(struct task_struct *tsk) ++{ ++ return (!tsk->mm && !strcmp(tsk->comm, "nfsd")); ++} ++ ++static inline void au_nfsd_lockdep_off(void) ++{ ++ /* braces are added to stop a warning */ ++ if (au_is_nfsd(current)) { ++ lockdep_off(); ++ } ++} ++ ++static inline void au_nfsd_lockdep_on(void) ++{ ++ /* braces are added to stop a warning */ ++ if (au_is_nfsd(current)) { ++ lockdep_on(); ++ } ++} ++#else ++static inline int au_is_nfsd(struct task_struct *tsk) ++{ ++ return 0; ++} ++static inline void au_init_export_op(struct super_block *sb) ++{ ++ /* nothing */ ++} ++#define au_nfsd_lockdep_off() /* */ ++#define au_nfsd_lockdep_on() /* */ ++#endif /* CONFIG_AUFS_EXPORT */ ++ ++static inline void init_lvma(struct aufs_sbinfo *sbinfo) ++{ ++#ifdef CONFIG_AUFS_ROBR ++ spin_lock_init(&sbinfo->si_lvma_lock); ++ INIT_LIST_HEAD(&sbinfo->si_lvma); ++#else ++ /* nothing */ ++#endif ++} ++ ++/* limited support before 2.6.18 */ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) ++static inline void au_mntget(struct super_block *sb) ++{ ++ mntget(stosi(sb)->si_mnt); ++} ++ ++static inline void au_mntput(struct super_block *sb) ++{ ++ mntput(stosi(sb)->si_mnt); ++} ++#else ++static inline void au_mntget(struct super_block *sb) ++{ ++ /* empty */ ++} ++ ++static inline void au_mntput(struct super_block *sb) ++{ ++ /* empty */ ++} ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline void au_flag_set(struct super_block *sb, unsigned int flag) ++{ ++ //SiMustWriteLock(sb); ++ stosi(sb)->si_flags |= flag; ++} ++ ++static inline void au_flag_clr(struct super_block *sb, unsigned int flag) ++{ ++ //SiMustWriteLock(sb); ++ stosi(sb)->si_flags &= ~flag; ++} ++ ++static inline ++unsigned int au_flag_test(struct super_block *sb, unsigned int flag) ++{ ++ //SiMustAnyLock(sb); ++ return stosi(sb)->si_flags & flag; ++} ++ ++static inline unsigned int au_flag_test_udba(struct super_block *sb) ++{ ++ return au_flag_test(sb, AuMask_UDBA); ++} ++ ++static inline unsigned int au_flag_test_coo(struct super_block *sb) ++{ ++ return au_flag_test(sb, AuMask_COO); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* lock superblock. mainly for entry point functions */ ++/* ++ * si_read_lock, si_write_lock, ++ * si_read_unlock, si_write_unlock, si_downgrade_lock ++ */ ++SimpleRwsemFuncs(si, struct super_block *sb, stosi(sb)->si_rwsem); ++ ++/* to debug easier, do not make them inlined functions */ ++#define SiMustReadLock(sb) RwMustReadLock(&stosi(sb)->si_rwsem) ++#define SiMustWriteLock(sb) RwMustWriteLock(&stosi(sb)->si_rwsem) ++#define SiMustAnyLock(sb) RwMustAnyLock(&stosi(sb)->si_rwsem) ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_SUPER_H__ */ +diff --git a/fs/aufs/sysaufs.c b/fs/aufs/sysaufs.c +new file mode 100755 +index 0000000..d686862 +--- /dev/null ++++ b/fs/aufs/sysaufs.c +@@ -0,0 +1,620 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: sysaufs.c,v 1.6 2007/05/14 03:40:10 sfjro Exp $ */ ++ ++#include <linux/module.h> ++#include <linux/seq_file.h> ++#include <linux/sysfs.h> ++#include "aufs.h" ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* super_blocks list is not exported */ ++static DEFINE_MUTEX(aufs_sbilist_mtx); ++static LIST_HEAD(aufs_sbilist); ++ ++/* ---------------------------------------------------------------------- */ ++ ++typedef ssize_t (*rwfunc_t)(struct kobject *kobj, char *buf, loff_t offset, ++ size_t sz, struct sysaufs_args *args); ++static ssize_t sysaufs_read(struct kobject *kobj, char *buf, loff_t offset, ++ size_t sz, struct sysaufs_args *args); ++static ssize_t sysaufs_free_write(struct kobject *kobj, char *buf, loff_t ++ offset, size_t sz, struct sysaufs_args *args); ++ ++#define GFunc(name, _index, func) \ ++static ssize_t name(struct kobject *kobj, char *buf, loff_t offset, size_t sz) \ ++{ \ ++ struct sysaufs_args args = { \ ++ .index = (_index), \ ++ .mtx = &aufs_sbilist_mtx, \ ++ .sb = NULL \ ++ }; \ ++ return func(kobj, buf, offset, sz, &args); \ ++} ++ ++#define GFuncs(name, _index) \ ++ GFunc(read_##name, _index, sysaufs_read); \ ++ GFunc(write_##name, _index, sysaufs_free_write); ++ ++static struct super_block *find_sb_locked(struct kobject *kobj) ++{ ++ struct super_block *sb; ++ struct aufs_sbinfo *sbinfo; ++ ++ TraceEnter(); ++ MtxMustLock(&aufs_sbilist_mtx); ++ ++ sb = NULL; ++ list_for_each_entry(sbinfo, &aufs_sbilist, si_list) { ++ if (&au_subsys_to_kset(sbinfo->si_sysaufs.subsys).kobj != kobj) ++ continue; ++ sb = sbinfo->si_mnt->mnt_sb; ++ si_read_lock(sb); ++ break; ++ } ++ return sb; ++} ++ ++static ssize_t sb_func(struct kobject *kobj, char *buf, loff_t offset, ++ size_t sz, struct sysaufs_args *args, rwfunc_t func) ++{ ++ ssize_t err; ++ ++ err = -ENOENT; ++ mutex_lock(&aufs_sbilist_mtx); ++ args->sb = find_sb_locked(kobj); ++ if (args->sb) { ++ err = func(kobj, buf, offset, sz, args); ++ si_read_unlock(args->sb); ++ } ++ mutex_unlock(&aufs_sbilist_mtx); ++ return err; ++} ++ ++#define SbFunc(name, _index, func) \ ++static ssize_t name(struct kobject *kobj, char *buf, loff_t offset, size_t sz) \ ++{ \ ++ struct sysaufs_args args = { \ ++ .index = (_index), \ ++ .mtx = NULL \ ++ }; \ ++ return sb_func(kobj, buf, offset, sz, &args, func); \ ++} ++ ++#define SbFuncs(name, index) \ ++ SbFunc(read_##name, index, sysaufs_read); \ ++ SbFunc(write_##name, index, sysaufs_free_write) ++ ++static decl_subsys(aufs, NULL, NULL); ++enum {Brs, Stat, Config, _Last}; ++static struct sysaufs_entry g_array[_Last]; ++GFuncs(brs, Brs); ++GFuncs(stat, Stat); ++GFuncs(config, Config); ++ ++SbFuncs(xino, SysaufsSb_XINO); ++ ++#define SetEntry(e, _name, init_size, _ops) \ ++ do { \ ++ (e)->attr.attr.name = #_name; \ ++ (e)->attr.attr.owner = THIS_MODULE; \ ++ (e)->attr.attr.mode = S_IRUGO | S_IWUSR; \ ++ (e)->attr.read = read_##_name; \ ++ (e)->attr.write = write_##_name; \ ++ (e)->allocated = init_size; \ ++ (e)->err = -1; \ ++ (e)->ops = _ops; \ ++ } while (0) ++ ++#define Priv(e) (e)->attr.private ++#define Allocated(e) (e)->allocated ++#define Len(e) (e)->attr.size ++#define Name(e) attr_name((e)->attr) ++ ++/* ---------------------------------------------------------------------- */ ++ ++static void free_entry(struct sysaufs_entry *e) ++{ ++ MtxMustLock(&aufs_sbilist_mtx); ++ DEBUG_ON(!Priv(e)); ++ ++ if (Allocated(e) > 0) ++ kfree(Priv(e)); ++ else ++ free_pages((unsigned long)Priv(e), -Allocated(e)); ++ Priv(e) = NULL; ++ Len(e) = 0; ++} ++ ++static void free_entries(void) ++{ ++ static int a[] = {Brs, -1}; ++ int *p = a; ++ ++ MtxMustLock(&aufs_sbilist_mtx); ++ ++ while (*p >= 0) { ++ if (Priv(g_array + *p)) ++ free_entry(g_array + *p); ++ p++; ++ } ++} ++ ++static int alloc_entry(struct sysaufs_entry *e) ++{ ++ MtxMustLock(&aufs_sbilist_mtx); ++ DEBUG_ON(Priv(e)); ++ //Dbg("%d\n", Allocated(e)); ++ ++ if (Allocated(e) > 0) ++ Priv(e) = kmalloc(Allocated(e), GFP_KERNEL); ++ else ++ Priv(e) = (void*)__get_free_pages(GFP_KERNEL, -Allocated(e)); ++ if (Priv(e)) ++ return 0; ++ return -ENOMEM; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static void unreg(au_subsys_t *subsys, struct sysaufs_entry *a, int n, ++ au_subsys_t *parent) ++{ ++ int i; ++ ++ TraceEnter(); ++ ++ for (i = 0; i < n; i++, a++) ++ if (!a->err) { ++ sysfs_remove_bin_file ++ (&au_subsys_to_kset(*subsys).kobj, &a->attr); ++ if (Priv(a)) ++ free_entry(a); ++ } ++ ++ subsystem_unregister(subsys); ++ subsys_put(parent); ++} ++ ++static int reg(au_subsys_t *subsys, struct sysaufs_entry *a, int n, ++ au_subsys_t *parent) ++{ ++ int err, i; ++ ++ TraceEnter(); ++ ++ subsys_get(parent); ++ kobj_set_kset_s(&au_subsys_to_kset(*subsys), *parent); ++ err = subsystem_register(subsys); ++ if (unlikely(err)) ++ goto out; ++ ++ for (i = 0; !err && i < n; i++) ++ err = a[i].err = sysfs_create_bin_file ++ (&au_subsys_to_kset(*subsys).kobj, &a[i].attr); ++ if (unlikely(err)) ++ unreg(subsys, a, n, parent); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#define SbSetEntry(index, name, init_size) \ ++ SetEntry(sa->array + index, name, init_size, au_si_ops); ++ ++void sysaufs_add(struct aufs_sbinfo *sbinfo) ++{ ++ int err; ++ struct sysaufs_sbinfo *sa = &sbinfo->si_sysaufs; ++ ++ TraceEnter(); ++ ++ mutex_lock(&aufs_sbilist_mtx); ++ list_add_tail(&sbinfo->si_list, &aufs_sbilist); ++ free_entries(); ++ ++ memset(sa, 0, sizeof(*sa)); ++ SbSetEntry(SysaufsSb_XINO, xino, 128); ++ err = kobject_set_name(&au_subsys_to_kset(sa->subsys).kobj, "%p", ++ sbinfo->si_mnt->mnt_sb); ++ if (!err) ++ err = reg(&sa->subsys, sa->array, ARRAY_SIZE(sa->array), ++ &aufs_subsys); ++ if (unlikely(err)) ++ Warn("failed adding sysfs (%d)\n", err); ++ ++ mutex_unlock(&aufs_sbilist_mtx); ++} ++ ++void sysaufs_del(struct aufs_sbinfo *sbinfo) ++{ ++ struct sysaufs_sbinfo *sa = &sbinfo->si_sysaufs; ++ ++ TraceEnter(); ++ ++ mutex_lock(&aufs_sbilist_mtx); ++ unreg(&sa->subsys, sa->array, ARRAY_SIZE(sa->array), &aufs_subsys); ++ list_del(&sbinfo->si_list); ++ free_entries(); ++ mutex_unlock(&aufs_sbilist_mtx); ++} ++ ++void sysaufs_notify_remount(void) ++{ ++ mutex_lock(&aufs_sbilist_mtx); ++ free_entries(); ++ mutex_unlock(&aufs_sbilist_mtx); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int make_brs(struct seq_file *seq, struct sysaufs_args *args, ++ int *do_size) ++{ ++ int err; ++ struct aufs_sbinfo *sbinfo; ++ ++ TraceEnter(); ++ MtxMustLock(&aufs_sbilist_mtx); ++ DEBUG_ON(args->index != Brs); ++ ++ err = 0; ++ list_for_each_entry(sbinfo, &aufs_sbilist, si_list) { ++ struct super_block *sb; ++ struct dentry *root; ++ struct vfsmount *mnt; ++ ++ sb = sbinfo->si_mnt->mnt_sb; ++ root = sb->s_root; ++ aufs_read_lock(root, !AUFS_I_RLOCK); ++ mnt = sbinfo->si_mnt; ++ err = seq_escape ++ (seq, mnt->mnt_devname ? mnt->mnt_devname : "none", ++ au_esc_chars); ++ if (!err) ++ err = seq_putc(seq, ' '); ++ if (!err) ++ err = seq_path(seq, mnt, root, au_esc_chars); ++ if (err > 0) ++ err = seq_printf(seq, " %p br:", sb); ++ if (!err) ++ err = au_show_brs(seq, sb); ++ aufs_read_unlock(root, !AUFS_I_RLOCK); ++ if (!err) ++ err = seq_putc(seq, '\n'); ++ else ++ break; ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int make_config(struct seq_file *seq, struct sysaufs_args *args, ++ int *do_size) ++{ ++ int err; ++ ++ TraceEnter(); ++ DEBUG_ON(args->index != Config); ++ ++#ifdef CONFIG_AUFS ++ err = seq_puts(seq, "CONFIG_AUFS=y\n"); ++#else ++ err = seq_puts(seq, "CONFIG_AUFS=m\n"); ++#endif ++ ++#define puts(m, v) \ ++ if (!err) err = seq_puts(seq, "CONFIG_AUFS_" #m "=" #v "\n") ++#define puts_bool(m) puts(m, y) ++#define puts_mod(m) puts(m, m) ++ ++#ifdef CONFIG_AUFS_FAKE_DM ++ puts_bool(FAKE_DM); ++#endif ++#ifdef CONFIG_AUFS_BRANCH_MAX_127 ++ puts_bool(BRANCH_MAX_127); ++#elif defined(CONFIG_AUFS_BRANCH_MAX_511) ++ puts_bool(BRANCH_MAX_511); ++#elif defined(CONFIG_AUFS_BRANCH_MAX_1023) ++ puts_bool(BRANCH_MAX_1023); ++#elif defined(CONFIG_AUFS_BRANCH_MAX_32767) ++ puts_bool(BRANCH_MAX_32767); ++#endif ++ puts_bool(SYSAUFS); ++#ifdef CONFIG_AUFS_HINOTIFY ++ puts_bool(HINOTIFY); ++#endif ++#ifdef CONFIG_AUFS_EXPORT ++ puts_bool(EXPORT); ++#endif ++#ifdef CONFIG_AUFS_ROBR ++ puts_bool(ROBR); ++#endif ++#ifdef CONFIG_AUFS_DLGT ++ puts_bool(DLGT); ++#endif ++#ifdef CONFIG_AUFS_LHASH_PATCH ++ puts_bool(LHASH_PATCH); ++#endif ++#ifdef CONFIG_AUFS_KSIZE_PATCH ++ puts_bool(KSIZE_PATCH); ++#endif ++#ifdef CONFIG_AUFS_DEBUG ++ puts_bool(DEBUG); ++#endif ++#ifdef CONFIG_AUFS_COMPAT ++ puts_bool(COMPAT); ++#endif ++ ++#undef puts_bool ++#undef puts ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int make_stat(struct seq_file *seq, struct sysaufs_args *args, ++ int *do_size) ++{ ++ int err, i; ++ ++ TraceEnter(); ++ DEBUG_ON(args->index != Stat); ++ ++ *do_size = 0; ++ err = seq_puts(seq, "wkq max_busy:"); ++ for (i = 0; !err && i < aufs_nwkq; i++) ++ err = seq_printf(seq, " %u", au_wkq[i].max_busy); ++ if (!err) ++ err = seq_printf(seq, ", %u(generic)\n", ++ au_wkq[aufs_nwkq].max_busy); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int make(struct sysaufs_entry *e, struct sysaufs_args *args, ++ int *do_size) ++ ++{ ++ int err; ++ struct seq_file *seq; ++ ++ TraceEnter(); ++ DEBUG_ON(Priv(e)); ++ MtxMustLock(&aufs_sbilist_mtx); ++ ++ err = -ENOMEM; ++ seq = kzalloc(sizeof(*seq), GFP_KERNEL); ++ if (unlikely(!seq)) ++ goto out; ++ ++ Len(e) = 0; ++ while (1) { ++ err = alloc_entry(e); ++ if (unlikely(err)) ++ break; ++ ++ //mutex_init(&seq.lock); ++ seq->buf = Priv(e); ++ seq->count = 0; ++ seq->size = Allocated(e); ++ if (unlikely(Allocated(e) <= 0)) ++ seq->size = PAGE_SIZE << -Allocated(e); ++ ++ err = e->ops[args->index](seq, args, do_size); ++ if (!err) { ++ Len(e) = seq->count; ++ break; /* success */ ++ } ++ ++ free_entry(e); ++ if (Allocated(e) > 0) { ++ Allocated(e) <<= 1; ++ if (unlikely(Allocated(e) >= (int)PAGE_SIZE)) ++ Allocated(e) = 0; ++ } else ++ Allocated(e)--; ++ //Dbg("%d\n", Allocated(e)); ++ } ++ kfree(seq); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* why does sysfs pass my parent kobject? */ ++static struct dentry *find_me(struct dentry *parent, struct sysaufs_entry *e) ++{ ++#if 1 ++ struct dentry *dentry; ++ const char *name = Name(e); ++ const unsigned int len = strlen(name); ++ ++ //Dbg("%.*s\n", DLNPair(parent)); ++ spin_lock(&dcache_lock); ++ list_for_each_entry(dentry, &parent->d_subdirs, D_CHILD) { ++ //Dbg("%.*s\n", DLNPair(dentry)); ++ if (len == dentry->d_name.len ++ && !strcmp(dentry->d_name.name, name)) { ++ spin_unlock(&dcache_lock); ++ return dentry; ++ } ++ } ++ spin_unlock(&dcache_lock); ++#endif ++ return NULL; ++} ++ ++static ssize_t sysaufs_read(struct kobject *kobj, char *buf, loff_t offset, ++ size_t sz, struct sysaufs_args *args) ++{ ++ ssize_t err; ++ loff_t len; ++ struct dentry *d; ++ struct sysaufs_entry *e; ++ int do_size; ++ ++ LKTRTrace("{%d, %p}, offset %Ld, sz %lu\n", ++ args->index, args->sb, offset, (unsigned long)sz); ++ ++ if (unlikely(!sz)) ++ return 0; ++ ++ err = 0; ++ d = NULL; ++ e = g_array + args->index; ++ if (args->sb) ++ e = stosi(args->sb)->si_sysaufs.array + args->index; ++ ++ do_size = 1; ++ if (args->mtx) ++ mutex_lock(args->mtx); ++ if (unlikely(!Priv(e))) { ++ err = make(e, args, &do_size); ++ DEBUG_ON(Len(e) > INT_MAX); ++ if (do_size) { ++ d = find_me(kobj->dentry, e); ++ if (d) ++ i_size_write(d->d_inode, Len(e)); ++ } ++ } ++ ++ if (!err) { ++ err = len = Len(e) - offset; ++ LKTRTrace("%Ld\n", len); ++ if (len > 0) { ++ if (len > sz) ++ err = sz; ++ memcpy(buf, Priv(e) + offset, err); ++ } ++ ++ if (!do_size) ++ free_entry(e); ++ } ++ if (args->mtx) ++ mutex_unlock(args->mtx); ++ ++ TraceErr(err); ++ return err; ++} ++ ++static ssize_t sysaufs_free_write(struct kobject *kobj, char *buf, ++ loff_t offset, size_t sz, ++ struct sysaufs_args *args) ++{ ++ struct dentry *d; ++ int allocated, len; ++ struct sysaufs_entry *e; ++ ++ LKTRTrace("{%d, %p}\n", args->index, args->sb); ++ ++ e = g_array + args->index; ++ if (args->sb) ++ e = stosi(args->sb)->si_sysaufs.array + args->index; ++ ++ if (args->mtx) ++ mutex_lock(args->mtx); ++ if (Priv(e)) { ++ allocated = Allocated(e); ++ if (unlikely(allocated <= 0)) ++ allocated = PAGE_SIZE << -allocated; ++ allocated >>= 1; ++ len = Len(e); ++ ++ free_entry(e); ++ if (unlikely(len <= allocated)) { ++ if (Allocated(e) >= 0) ++ Allocated(e) = allocated; ++ else ++ Allocated(e)++; ++ } ++ ++ d = find_me(kobj->dentry, e); ++ if (d && i_size_read(d->d_inode)) ++ i_size_write(d->d_inode, 0); ++ } ++ if (args->mtx) ++ mutex_unlock(args->mtx); ++ ++ return sz; ++} ++ ++static sysaufs_op g_ops[] = { ++ [Brs] = make_brs, ++ [Stat] = make_stat, ++ [Config] = make_config ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++#define GSetEntry(index, name, init_size) \ ++ SetEntry(g_array + index, name, init_size, g_ops) ++ ++int __init sysaufs_init(void) ++{ ++ int err; ++ ++ GSetEntry(Brs, brs, 128); ++ GSetEntry(Stat, stat, 32); ++ GSetEntry(Config, config, 256); ++ err = reg(&aufs_subsys, g_array, ARRAY_SIZE(g_array), &fs_subsys); ++ TraceErr(err); ++ return err; ++} ++ ++void __exit sysaufs_fin(void) ++{ ++ mutex_lock(&aufs_sbilist_mtx); ++ unreg(&aufs_subsys, g_array, ARRAY_SIZE(g_array), &fs_subsys); ++ mutex_unlock(&aufs_sbilist_mtx); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef DbgDlgt ++int is_branch(struct super_block *h_sb) ++{ ++ int found = 0; ++ struct aufs_sbinfo *sbinfo; ++ ++ //Dbg("here\n"); ++ mutex_lock(&aufs_sbilist_mtx); ++ list_for_each_entry(sbinfo, &aufs_sbilist, si_list) { ++ aufs_bindex_t bindex, bend; ++ struct super_block *sb; ++ ++ sb = sbinfo->si_mnt->mnt_sb; ++ si_read_lock(sb); ++ bend = sbend(sb); ++ for (bindex = 0; !found && bindex <= bend; bindex++) ++ found = (h_sb == sbr_sb(sb, bindex)); ++ si_read_unlock(sb); ++ } ++ mutex_unlock(&aufs_sbilist_mtx); ++ return found; ++} ++#endif +diff --git a/fs/aufs/sysaufs.h b/fs/aufs/sysaufs.h +new file mode 100755 +index 0000000..cf0247f +--- /dev/null ++++ b/fs/aufs/sysaufs.h +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: sysaufs.h,v 1.3 2007/05/14 06:27:18 sfjro Exp $ */ ++ ++#ifndef __SYSAUFS_H__ ++#define __SYSAUFS_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/seq_file.h> ++#include <linux/sysfs.h> ++#include <linux/version.h> ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) ++typedef struct kset au_subsys_t; ++#define au_subsys_to_kset(subsys) (subsys) ++#else ++typedef struct subsystem au_subsys_t; ++#define au_subsys_to_kset(subsys) ((subsys).kset) ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* arguments for an entry under sysfs */ ++struct sysaufs_args { ++ int index; ++ struct mutex *mtx; ++ struct super_block *sb; ++}; ++ ++typedef int (*sysaufs_op)(struct seq_file *seq, struct sysaufs_args *args, ++ int *do_size); ++ ++/* an entry under sysfs */ ++struct sysaufs_entry { ++ struct bin_attribute attr; ++ int allocated; /* zero minus means pages */ ++ int err; ++ sysaufs_op *ops; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct aufs_sbinfo; ++#ifdef CONFIG_AUFS_SYSAUFS ++void sysaufs_add(struct aufs_sbinfo *sbinfo); ++void sysaufs_del(struct aufs_sbinfo *sbinfo); ++int __init sysaufs_init(void); ++void sysaufs_fin(void); ++void sysaufs_notify_remount(void); ++#else ++static inline void sysaufs_add(struct aufs_sbinfo *sbinfo) ++{ ++ /* nothing */ ++} ++ ++static inline void sysaufs_del(struct aufs_sbinfo *sbinfo) ++{ ++ /* nothing */ ++} ++#define sysaufs_init() 0 ++#define sysaufs_fin() /* */ ++#define sysaufs_notify_remount() /* */ ++#endif /* CONFIG_AUFS_SYSAUFS */ ++ ++#endif /* __KERNEL__ */ ++#endif /* __SYSAUFS_H__ */ +diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c +new file mode 100755 +index 0000000..8e99b7d +--- /dev/null ++++ b/fs/aufs/vdir.c +@@ -0,0 +1,802 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: vdir.c,v 1.22 2007/05/14 03:38:52 sfjro Exp $ */ ++ ++#include "aufs.h" ++ ++static int calc_size(int namelen) ++{ ++ int sz; ++ ++ sz = sizeof(struct aufs_de) + namelen; ++ if (sizeof(ino_t) == sizeof(long)) { ++ const int mask = sizeof(ino_t) - 1; ++ if (sz & mask) { ++ sz += sizeof(ino_t); ++ sz &= ~mask; ++ } ++ } else { ++#if 0 // remove ++ BUG(); ++ // this block will be discarded by optimizer. ++ int m; ++ m = sz % sizeof(ino_t); ++ if (m) ++ sz += sizeof(ino_t) - m; ++#endif ++ } ++ ++ DEBUG_ON(sz % sizeof(ino_t)); ++ return sz; ++} ++ ++static int set_deblk_end(union aufs_deblk_p *p, union aufs_deblk_p *deblk_end) ++{ ++ if (calc_size(0) <= deblk_end->p - p->p) { ++ p->de->de_str.len = 0; ++ //smp_mb(); ++ return 0; ++ } ++ return -1; // error ++} ++ ++/* returns true or false */ ++static int is_deblk_end(union aufs_deblk_p *p, union aufs_deblk_p *deblk_end) ++{ ++ if (calc_size(0) <= deblk_end->p - p->p) ++ return !p->de->de_str.len; ++ return 1; ++} ++ ++static aufs_deblk_t *last_deblk(struct aufs_vdir *vdir) ++{ ++ return vdir->vd_deblk[vdir->vd_nblk - 1]; ++} ++ ++void nhash_init(struct aufs_nhash *nhash) ++{ ++ int i; ++ for (i = 0; i < AUFS_NHASH_SIZE; i++) ++ INIT_HLIST_HEAD(nhash->heads + i); ++} ++ ++struct aufs_nhash *nhash_new(gfp_t gfp) ++{ ++ struct aufs_nhash *nhash; ++ ++ nhash = kmalloc(sizeof(*nhash), gfp); ++ if (nhash) { ++ nhash_init(nhash); ++ return nhash; ++ } ++ return ERR_PTR(-ENOMEM); ++} ++ ++void nhash_del(struct aufs_nhash *nhash) ++{ ++ nhash_fin(nhash); ++ kfree(nhash); ++} ++ ++void nhash_move(struct aufs_nhash *dst, struct aufs_nhash *src) ++{ ++ int i; ++ ++ TraceEnter(); ++ ++ //DbgWhlist(src); ++ *dst = *src; ++ for (i = 0; i < AUFS_NHASH_SIZE; i++) { ++ struct hlist_head *h; ++ h = dst->heads + i; ++ if (h->first) ++ h->first->pprev = &h->first; ++ INIT_HLIST_HEAD(src->heads + i); ++ } ++ //DbgWhlist(src); ++ //DbgWhlist(dst); ++ //smp_mb(); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void nhash_fin(struct aufs_nhash *whlist) ++{ ++ int i; ++ struct hlist_head *head; ++ struct aufs_wh *tpos; ++ struct hlist_node *pos, *n; ++ ++ TraceEnter(); ++ ++ for (i = 0; i < AUFS_NHASH_SIZE; i++) { ++ head = whlist->heads + i; ++ hlist_for_each_entry_safe(tpos, pos, n, head, wh_hash) { ++ //hlist_del(pos); ++ kfree(tpos); ++ } ++ } ++} ++ ++int is_longer_wh(struct aufs_nhash *whlist, aufs_bindex_t btgt, int limit) ++{ ++ int n, i; ++ struct hlist_head *head; ++ struct aufs_wh *tpos; ++ struct hlist_node *pos; ++ ++ LKTRTrace("limit %d\n", limit); ++ //return 1; ++ ++ n = 0; ++ for (i = 0; i < AUFS_NHASH_SIZE; i++) { ++ head = whlist->heads + i; ++ hlist_for_each_entry(tpos, pos, head, wh_hash) ++ if (tpos->wh_bindex == btgt && ++n > limit) ++ return 1; ++ } ++ return 0; ++} ++ ++/* returns found(true) or not */ ++int test_known_wh(struct aufs_nhash *whlist, char *name, int namelen) ++{ ++ struct hlist_head *head; ++ struct aufs_wh *tpos; ++ struct hlist_node *pos; ++ struct aufs_destr *str; ++ ++ LKTRTrace("%.*s\n", namelen, name); ++ ++ head = whlist->heads + au_name_hash(name, namelen); ++ hlist_for_each_entry(tpos, pos, head, wh_hash) { ++ str = &tpos->wh_str; ++ LKTRTrace("%.*s\n", str->len, str->name); ++ if (str->len == namelen && !memcmp(str->name, name, namelen)) ++ return 1; ++ } ++ return 0; ++} ++ ++int append_wh(struct aufs_nhash *whlist, char *name, int namelen, ++ aufs_bindex_t bindex) ++{ ++ int err; ++ struct aufs_destr *str; ++ struct aufs_wh *wh; ++ ++ LKTRTrace("%.*s\n", namelen, name); ++ ++ err = -ENOMEM; ++ wh = kmalloc(sizeof(*wh) + namelen, GFP_KERNEL); ++ if (unlikely(!wh)) ++ goto out; ++ err = 0; ++ wh->wh_bindex = bindex; ++ str = &wh->wh_str; ++ str->len = namelen; ++ memcpy(str->name, name, namelen); ++ hlist_add_head(&wh->wh_hash, ++ whlist->heads + au_name_hash(name, namelen)); ++ //smp_mb(); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++void free_vdir(struct aufs_vdir *vdir) ++{ ++ aufs_deblk_t **deblk; ++ ++ TraceEnter(); ++ ++ deblk = vdir->vd_deblk; ++ while (vdir->vd_nblk--) { ++ kfree(*deblk); ++ deblk++; ++ } ++ kfree(vdir->vd_deblk); ++ cache_free_vdir(vdir); ++} ++ ++static int append_deblk(struct aufs_vdir *vdir) ++{ ++ int err, sz, i; ++ aufs_deblk_t **o; ++ union aufs_deblk_p p, deblk_end; ++ ++ TraceEnter(); ++ ++ err = -ENOMEM; ++ sz = sizeof(*o) * vdir->vd_nblk; ++ o = au_kzrealloc(vdir->vd_deblk, sz, sz + sizeof(*o), GFP_KERNEL); ++ if (unlikely(!o)) ++ goto out; ++ vdir->vd_deblk = o; ++ p.deblk = kmalloc(sizeof(*p.deblk), GFP_KERNEL); ++ if (p.deblk) { ++ i = vdir->vd_nblk++; ++ vdir->vd_deblk[i] = p.deblk; ++ vdir->vd_last.i = i; ++ vdir->vd_last.p.p = p.p; ++ deblk_end.deblk = p.deblk + 1; ++ err = set_deblk_end(&p, &deblk_end); ++ DEBUG_ON(err); ++ } ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static struct aufs_vdir *alloc_vdir(void) ++{ ++ struct aufs_vdir *vdir; ++ int err; ++ ++ TraceEnter(); ++ ++ err = -ENOMEM; ++ vdir = cache_alloc_vdir(); ++ if (unlikely(!vdir)) ++ goto out; ++ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_KERNEL); ++ if (unlikely(!vdir->vd_deblk)) ++ goto out_free; ++ ++ vdir->vd_nblk = 0; ++ vdir->vd_version = 0; ++ vdir->vd_jiffy = 0; ++ err = append_deblk(vdir); ++ if (!err) ++ return vdir; /* success */ ++ ++ kfree(vdir->vd_deblk); ++ ++ out_free: ++ cache_free_vdir(vdir); ++ out: ++ vdir = ERR_PTR(err); ++ TraceErrPtr(vdir); ++ return vdir; ++} ++ ++static int reinit_vdir(struct aufs_vdir *vdir) ++{ ++ int err; ++ union aufs_deblk_p p, deblk_end; ++ ++ TraceEnter(); ++ ++ while (vdir->vd_nblk > 1) { ++ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]); ++ vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; ++ vdir->vd_nblk--; ++ } ++ p.deblk = vdir->vd_deblk[0]; ++ deblk_end.deblk = p.deblk + 1; ++ err = set_deblk_end(&p, &deblk_end); ++ DEBUG_ON(err); ++ vdir->vd_version = 0; ++ vdir->vd_jiffy = 0; ++ vdir->vd_last.i = 0; ++ vdir->vd_last.p.deblk = vdir->vd_deblk[0]; ++ //smp_mb(); ++ //DbgVdir(vdir); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static void free_dehlist(struct aufs_nhash *dehlist) ++{ ++ int i; ++ struct hlist_head *head; ++ struct aufs_dehstr *tpos; ++ struct hlist_node *pos, *n; ++ ++ TraceEnter(); ++ ++ for (i = 0; i < AUFS_NHASH_SIZE; i++) { ++ head = dehlist->heads + i; ++ hlist_for_each_entry_safe(tpos, pos, n, head, hash) { ++ //hlist_del(pos); ++ cache_free_dehstr(tpos); ++ } ++ } ++} ++ ++/* returns found(true) or not */ ++static int test_known(struct aufs_nhash *delist, char *name, int namelen) ++{ ++ struct hlist_head *head; ++ struct aufs_dehstr *tpos; ++ struct hlist_node *pos; ++ struct aufs_destr *str; ++ ++ LKTRTrace("%.*s\n", namelen, name); ++ ++ head = delist->heads + au_name_hash(name, namelen); ++ hlist_for_each_entry(tpos, pos, head, hash) { ++ str = tpos->str; ++ LKTRTrace("%.*s\n", str->len, str->name); ++ if (str->len == namelen && !memcmp(str->name, name, namelen)) ++ return 1; ++ } ++ return 0; ++ ++} ++ ++static int append_de(struct aufs_vdir *vdir, char *name, int namelen, ino_t ino, ++ unsigned int d_type, struct aufs_nhash *delist) ++{ ++ int err, sz; ++ union aufs_deblk_p p, *room, deblk_end; ++ struct aufs_dehstr *dehstr; ++ ++ LKTRTrace("%.*s %d, i%lu, dt%u\n", namelen, name, namelen, ino, d_type); ++ ++ p.deblk = last_deblk(vdir); ++ deblk_end.deblk = p.deblk + 1; ++ room = &vdir->vd_last.p; ++ DEBUG_ON(room->p < p.p || deblk_end.p <= room->p ++ || !is_deblk_end(room, &deblk_end)); ++ ++ sz = calc_size(namelen); ++ if (unlikely(sz > deblk_end.p - room->p)) { ++ err = append_deblk(vdir); ++ if (unlikely(err)) ++ goto out; ++ p.deblk = last_deblk(vdir); ++ deblk_end.deblk = p.deblk + 1; ++ //smp_mb(); ++ DEBUG_ON(room->p != p.p); ++ } ++ ++ err = -ENOMEM; ++ dehstr = cache_alloc_dehstr(); ++ if (unlikely(!dehstr)) ++ goto out; ++ dehstr->str = &room->de->de_str; ++ hlist_add_head(&dehstr->hash, ++ delist->heads + au_name_hash(name, namelen)); ++ ++ room->de->de_ino = ino; ++ room->de->de_type = d_type; ++ room->de->de_str.len = namelen; ++ memcpy(room->de->de_str.name, name, namelen); ++ ++ err = 0; ++ room->p += sz; ++ if (unlikely(set_deblk_end(room, &deblk_end))) ++ err = append_deblk(vdir); ++ //smp_mb(); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct fillvdir_arg { ++ struct file *file; ++ struct aufs_vdir *vdir; ++ struct aufs_nhash *delist; ++ struct aufs_nhash *whlist; ++ aufs_bindex_t bindex; ++ int err; ++ int called; ++}; ++ ++static int fillvdir(void *__arg, const char *__name, int namelen, loff_t offset, ++ filldir_ino_t h_ino, unsigned int d_type) ++{ ++ struct fillvdir_arg *arg = __arg; ++ char *name = (void*)__name; ++ aufs_bindex_t bindex, bend; ++ struct xino xino; ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s, namelen %d, i%Lu, dt%u\n", ++ namelen, name, namelen, (u64)h_ino, d_type); ++ ++ sb = arg->file->f_dentry->d_sb; ++ bend = arg->bindex; ++ arg->err = 0; ++ arg->called++; ++ //smp_mb(); ++ if (namelen <= AUFS_WH_PFX_LEN ++ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) { ++ for (bindex = 0; bindex < bend; bindex++) ++ if (test_known(arg->delist + bindex, name, namelen) ++ || test_known_wh(arg->whlist + bindex, name, ++ namelen)) ++ goto out; /* already exists or whiteouted */ ++ ++ arg->err = xino_read(sb, bend, h_ino, &xino); ++ if (!arg->err && !xino.ino) { ++ //struct inode *h_inode; ++ xino.ino = xino_new_ino(sb); ++ if (unlikely(!xino.ino)) ++ arg->err = -EIO; ++#if 0 ++ //xino.h_gen = AuXino_INVALID_HGEN; ++ h_inode = ilookup(sbr_sb(sb, bend), h_ino); ++ if (h_inode) { ++ if (!is_bad_inode(h_inode)) { ++ xino.h_gen = h_inode->i_generation; ++ WARN_ON(xino.h_gen == AuXino_INVALID_HGEN); ++ } ++ iput(h_inode); ++ } ++#endif ++ arg->err = xino_write(sb, bend, h_ino, &xino); ++ } ++ if (!arg->err) ++ arg->err = append_de(arg->vdir, name, namelen, xino.ino, ++ d_type, arg->delist + bend); ++ } else { ++ name += AUFS_WH_PFX_LEN; ++ namelen -= AUFS_WH_PFX_LEN; ++ for (bindex = 0; bindex < bend; bindex++) ++ if (test_known_wh(arg->whlist + bend, name, namelen)) ++ goto out; /* already whiteouted */ ++ arg->err = append_wh(arg->whlist + bend, name, namelen, bend); ++ } ++ ++ out: ++ if (!arg->err) ++ arg->vdir->vd_jiffy = jiffies; ++ //smp_mb(); ++ TraceErr(arg->err); ++ return arg->err; ++} ++ ++static int read_vdir(struct file *file, int may_read) ++{ ++ int err, do_read, dlgt; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct aufs_vdir *vdir, *allocated; ++ unsigned long expire; ++ struct fillvdir_arg arg; ++ aufs_bindex_t bindex, bend, bstart; ++ struct super_block *sb; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, may %d\n", DLNPair(dentry), may_read); ++ FiMustWriteLock(file); ++ inode = dentry->d_inode; ++ IMustLock(inode); ++ IiMustWriteLock(inode); ++ DEBUG_ON(!S_ISDIR(inode->i_mode)); ++ ++ err = 0; ++ allocated = NULL; ++ do_read = 0; ++ sb = inode->i_sb; ++ expire = stosi(sb)->si_rdcache; ++ vdir = ivdir(inode); ++ if (!vdir) { ++ DEBUG_ON(fvdir_cache(file)); ++ do_read = 1; ++ vdir = alloc_vdir(); ++ err = PTR_ERR(vdir); ++ if (IS_ERR(vdir)) ++ goto out; ++ err = 0; ++ allocated = vdir; ++ } else if (may_read ++ && (inode->i_version != vdir->vd_version ++ || time_after(jiffies, vdir->vd_jiffy + expire))) { ++ LKTRTrace("iver %lu, vdver %lu, exp %lu\n", ++ inode->i_version, vdir->vd_version, ++ vdir->vd_jiffy + expire); ++ do_read = 1; ++ err = reinit_vdir(vdir); ++ if (unlikely(err)) ++ goto out; ++ } ++ //DbgVdir(vdir); goto out; ++ ++ if (!do_read) ++ return 0; /* success */ ++ ++ err = -ENOMEM; ++ bend = fbend(file); ++ arg.delist = kmalloc(sizeof(*arg.delist) * (bend + 1), GFP_KERNEL); ++ if (unlikely(!arg.delist)) ++ goto out_vdir; ++ arg.whlist = kmalloc(sizeof(*arg.whlist) * (bend + 1), GFP_KERNEL); ++ if (unlikely(!arg.whlist)) ++ goto out_delist; ++ err = 0; ++ for (bindex = 0; bindex <= bend; bindex++) { ++ nhash_init(arg.delist + bindex); ++ nhash_init(arg.whlist + bindex); ++ } ++ ++ dlgt = need_dlgt(sb); ++ arg.file = file; ++ arg.vdir = vdir; ++ bstart = fbstart(file); ++ for (bindex = bstart; !err && bindex <= bend; bindex++) { ++ struct file *hf; ++ struct inode *h_inode; ++ ++ hf = au_h_fptr_i(file, bindex); ++ if (!hf) ++ continue; ++ ++ h_inode = hf->f_dentry->d_inode; ++ //hf->f_pos = 0; ++ arg.bindex = bindex; ++ do { ++ arg.err = 0; ++ arg.called = 0; ++ //smp_mb(); ++ err = vfsub_readdir(hf, fillvdir, &arg, dlgt); ++ if (err >= 0) ++ err = arg.err; ++ } while (!err && arg.called); ++ } ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ free_dehlist(arg.delist + bindex); ++ nhash_fin(arg.whlist + bindex); ++ } ++ kfree(arg.whlist); ++ ++ out_delist: ++ kfree(arg.delist); ++ out_vdir: ++ if (!err) { ++ //file->f_pos = 0; ++ vdir->vd_version = inode->i_version; ++ vdir->vd_last.i = 0; ++ vdir->vd_last.p.deblk = vdir->vd_deblk[0]; ++ if (allocated) ++ set_ivdir(inode, allocated); ++ } else if (allocated) ++ free_vdir(allocated); ++ //DbgVdir(vdir); goto out; ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static int copy_vdir(struct aufs_vdir *tgt, struct aufs_vdir *src) ++{ ++ int err, i, rerr, n; ++ ++ TraceEnter(); ++ DEBUG_ON(tgt->vd_nblk != 1); ++ //DbgVdir(tgt); ++ ++ err = -ENOMEM; ++ if (tgt->vd_nblk < src->vd_nblk) { ++ aufs_deblk_t **p; ++ p = au_kzrealloc(tgt->vd_deblk, sizeof(*p) * tgt->vd_nblk, ++ sizeof(*p) * src->vd_nblk, GFP_KERNEL); ++ if (unlikely(!p)) ++ goto out; ++ tgt->vd_deblk = p; ++ } ++ ++ n = tgt->vd_nblk = src->vd_nblk; ++ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], AUFS_DEBLK_SIZE); ++ //tgt->vd_last.i = 0; ++ //tgt->vd_last.p.deblk = tgt->vd_deblk[0]; ++ tgt->vd_version = src->vd_version; ++ tgt->vd_jiffy = src->vd_jiffy; ++ ++ for (i = 1; i < n; i++) { ++ tgt->vd_deblk[i] = kmalloc(AUFS_DEBLK_SIZE, GFP_KERNEL); ++ if (tgt->vd_deblk[i]) ++ memcpy(tgt->vd_deblk[i], src->vd_deblk[i], ++ AUFS_DEBLK_SIZE); ++ else ++ goto out; ++ } ++ //smp_mb(); ++ //DbgVdir(tgt); ++ return 0; /* success */ ++ ++ out: ++ rerr = reinit_vdir(tgt); ++ BUG_ON(rerr); ++ TraceErr(err); ++ return err; ++} ++ ++int au_init_vdir(struct file *file) ++{ ++ int err; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct aufs_vdir *vdir_cache, *allocated; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, pos %Ld\n", DLNPair(dentry), file->f_pos); ++ FiMustWriteLock(file); ++ inode = dentry->d_inode; ++ IiMustWriteLock(inode); ++ DEBUG_ON(!S_ISDIR(inode->i_mode)); ++ ++ err = read_vdir(file, !file->f_pos); ++ if (unlikely(err)) ++ goto out; ++ //DbgVdir(ivdir(inode)); goto out; ++ ++ allocated = NULL; ++ vdir_cache = fvdir_cache(file); ++ if (!vdir_cache) { ++ vdir_cache = alloc_vdir(); ++ err = PTR_ERR(vdir_cache); ++ if (IS_ERR(vdir_cache)) ++ goto out; ++ allocated = vdir_cache; ++ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) { ++ err = reinit_vdir(vdir_cache); ++ if (unlikely(err)) ++ goto out; ++ } else ++ return 0; /* success */ ++ //err = 0; DbgVdir(vdir_cache); goto out; ++ ++ err = copy_vdir(vdir_cache, ivdir(inode)); ++ if (!err) { ++ file->f_version = inode->i_version; ++ if (allocated) ++ set_fvdir_cache(file, allocated); ++ } else if (allocated) ++ free_vdir(allocated); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++static loff_t calc_offset(struct aufs_vdir *vdir) ++{ ++ loff_t offset; ++ union aufs_deblk_p p; ++ ++ p.deblk = vdir->vd_deblk[vdir->vd_last.i]; ++ offset = vdir->vd_last.p.p - p.p; ++ offset += sizeof(*p.deblk) * vdir->vd_last.i; ++ return offset; ++} ++ ++/* returns true or false */ ++static int seek_vdir(struct file *file) ++{ ++ int valid, i, n; ++ struct dentry *dentry; ++ struct aufs_vdir *vdir_cache; ++ loff_t offset; ++ union aufs_deblk_p p, deblk_end; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, pos %Ld\n", DLNPair(dentry), file->f_pos); ++ vdir_cache = fvdir_cache(file); ++ DEBUG_ON(!vdir_cache); ++ //DbgVdir(vdir_cache); ++ ++ valid = 1; ++ offset = calc_offset(vdir_cache); ++ LKTRTrace("offset %Ld\n", offset); ++ if (file->f_pos == offset) ++ goto out; ++ ++ vdir_cache->vd_last.i = 0; ++ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0]; ++ if (!file->f_pos) ++ goto out; ++ ++ valid = 0; ++ i = file->f_pos / AUFS_DEBLK_SIZE; ++ LKTRTrace("i %d\n", i); ++ if (i >= vdir_cache->vd_nblk) ++ goto out; ++ ++ n = vdir_cache->vd_nblk; ++ //DbgVdir(vdir_cache); ++ for (; i < n; i++) { ++ p.deblk = vdir_cache->vd_deblk[i]; ++ deblk_end.deblk = p.deblk + 1; ++ offset = i * AUFS_DEBLK_SIZE; ++ while (!is_deblk_end(&p, &deblk_end) && offset < file->f_pos) { ++ int l; ++ l = calc_size(p.de->de_str.len); ++ offset += l; ++ p.p += l; ++ } ++ if (!is_deblk_end(&p, &deblk_end)) { ++ valid = 1; ++ vdir_cache->vd_last.i = i; ++ vdir_cache->vd_last.p = p; ++ break; ++ } ++ } ++ ++ out: ++ //smp_mb(); ++ //DbgVdir(vdir_cache); ++ TraceErr(!valid); ++ return valid; ++} ++ ++int au_fill_de(struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err, l; ++ struct dentry *dentry; ++ struct aufs_vdir *vdir_cache; ++ struct aufs_de *de; ++ union aufs_deblk_p deblk_end; ++ ++ dentry = file->f_dentry; ++ LKTRTrace("%.*s, pos %Ld\n", DLNPair(dentry), file->f_pos); ++ vdir_cache = fvdir_cache(file); ++ DEBUG_ON(!vdir_cache); ++ //DbgVdir(vdir_cache); ++ ++ if (!seek_vdir(file)) ++ return 0; ++ ++ while (1) { ++ deblk_end.deblk ++ = vdir_cache->vd_deblk[vdir_cache->vd_last.i] + 1; ++ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) { ++ de = vdir_cache->vd_last.p.de; ++ LKTRTrace("%.*s, off%Ld, i%lu, dt%d\n", ++ de->de_str.len, de->de_str.name, ++ file->f_pos, de->de_ino, de->de_type); ++ err = filldir(dirent, de->de_str.name, de->de_str.len, ++ file->f_pos, de->de_ino, de->de_type); ++ if (unlikely(err)) { ++ TraceErr(err); ++ //return err; ++ //todo: ignore the error caused by udba. ++ return 0; ++ } ++ ++ l = calc_size(de->de_str.len); ++ vdir_cache->vd_last.p.p += l; ++ file->f_pos += l; ++ } ++ if (vdir_cache->vd_last.i < vdir_cache->vd_nblk - 1) { ++ vdir_cache->vd_last.i++; ++ vdir_cache->vd_last.p.deblk ++ = vdir_cache->vd_deblk[vdir_cache->vd_last.i]; ++ file->f_pos = sizeof(*vdir_cache->vd_last.p.deblk) ++ * vdir_cache->vd_last.i; ++ continue; ++ } ++ break; ++ } ++ ++ //smp_mb(); ++ return 0; ++} +diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c +new file mode 100755 +index 0000000..8571d21 +--- /dev/null ++++ b/fs/aufs/vfsub.c +@@ -0,0 +1,665 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: vfsub.c,v 1.5 2007/04/23 00:55:06 sfjro Exp $ */ ++// I'm going to slightly mad ++ ++#include "aufs.h" ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_DLGT ++struct permission_args { ++ int *errp; ++ struct inode *inode; ++ int mask; ++ struct nameidata *nd; ++}; ++ ++static void call_permission(void *args) ++{ ++ struct permission_args *a = args; ++ *a->errp = do_vfsub_permission(a->inode, a->mask, a->nd); ++} ++ ++int vfsub_permission(struct inode *inode, int mask, struct nameidata *nd, ++ int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_permission(inode, mask, nd); ++ else { ++ int err; ++ struct permission_args args = { ++ .errp = &err, ++ .inode = inode, ++ .mask = mask, ++ .nd = nd ++ }; ++ au_wkq_wait(call_permission, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct create_args { ++ int *errp; ++ struct inode *dir; ++ struct dentry *dentry; ++ int mode; ++ struct nameidata *nd; ++}; ++ ++static void call_create(void *args) ++{ ++ struct create_args *a = args; ++ *a->errp = do_vfsub_create(a->dir, a->dentry, a->mode, a->nd); ++} ++ ++int vfsub_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_create(dir, dentry, mode, nd); ++ else { ++ int err; ++ struct create_args args = { ++ .errp = &err, ++ .dir = dir, ++ .dentry = dentry, ++ .mode = mode, ++ .nd = nd ++ }; ++ au_wkq_wait(call_create, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++struct symlink_args { ++ int *errp; ++ struct inode *dir; ++ struct dentry *dentry; ++ const char *symname; ++ int mode; ++}; ++ ++static void call_symlink(void *args) ++{ ++ struct symlink_args *a = args; ++ *a->errp = do_vfsub_symlink(a->dir, a->dentry, a->symname, a->mode); ++} ++ ++int vfsub_symlink(struct inode *dir, struct dentry *dentry, const char *symname, ++ int mode, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_symlink(dir, dentry, symname, mode); ++ else { ++ int err; ++ struct symlink_args args = { ++ .errp = &err, ++ .dir = dir, ++ .dentry = dentry, ++ .symname = symname, ++ .mode = mode ++ }; ++ au_wkq_wait(call_symlink, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++struct mknod_args { ++ int *errp; ++ struct inode *dir; ++ struct dentry *dentry; ++ int mode; ++ dev_t dev; ++}; ++ ++static void call_mknod(void *args) ++{ ++ struct mknod_args *a = args; ++ *a->errp = do_vfsub_mknod(a->dir, a->dentry, a->mode, a->dev); ++} ++ ++int vfsub_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev, ++ int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_mknod(dir, dentry, mode, dev); ++ else { ++ int err; ++ struct mknod_args args = { ++ .errp = &err, ++ .dir = dir, ++ .dentry = dentry, ++ .mode = mode, ++ .dev = dev ++ }; ++ au_wkq_wait(call_mknod, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++struct mkdir_args { ++ int *errp; ++ struct inode *dir; ++ struct dentry *dentry; ++ int mode; ++}; ++ ++static void call_mkdir(void *args) ++{ ++ struct mkdir_args *a = args; ++ *a->errp = do_vfsub_mkdir(a->dir, a->dentry, a->mode); ++} ++ ++int vfsub_mkdir(struct inode *dir, struct dentry *dentry, int mode, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_mkdir(dir, dentry, mode); ++ else { ++ int err; ++ struct mkdir_args args = { ++ .errp = &err, ++ .dir = dir, ++ .dentry = dentry, ++ .mode = mode ++ }; ++ au_wkq_wait(call_mkdir, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct link_args { ++ int *errp; ++ struct inode *dir; ++ struct dentry *src_dentry, *dentry; ++}; ++ ++static void call_link(void *args) ++{ ++ struct link_args *a = args; ++ *a->errp = do_vfsub_link(a->src_dentry, a->dir, a->dentry); ++} ++ ++int vfsub_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_link(src_dentry, dir, dentry); ++ else { ++ int err; ++ struct link_args args = { ++ .errp = &err, ++ .src_dentry = src_dentry, ++ .dir = dir, ++ .dentry = dentry ++ }; ++ au_wkq_wait(call_link, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++struct rename_args { ++ int *errp; ++ struct inode *src_dir, *dir; ++ struct dentry *src_dentry, *dentry; ++}; ++ ++static void call_rename(void *args) ++{ ++ struct rename_args *a = args; ++ *a->errp = do_vfsub_rename(a->src_dir, a->src_dentry, a->dir, ++ a->dentry); ++} ++ ++int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_rename(src_dir, src_dentry, dir, dentry); ++ else { ++ int err; ++ struct rename_args args = { ++ .errp = &err, ++ .src_dir = src_dir, ++ .src_dentry = src_dentry, ++ .dir = dir, ++ .dentry = dentry ++ }; ++ au_wkq_wait(call_rename, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++struct rmdir_args { ++ int *errp; ++ struct inode *dir; ++ struct dentry *dentry; ++}; ++ ++static void call_rmdir(void *args) ++{ ++ struct rmdir_args *a = args; ++ *a->errp = do_vfsub_rmdir(a->dir, a->dentry); ++} ++ ++int vfsub_rmdir(struct inode *dir, struct dentry *dentry, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_rmdir(dir, dentry); ++ else { ++ int err; ++ struct rmdir_args args = { ++ .errp = &err, ++ .dir = dir, ++ .dentry = dentry ++ }; ++ au_wkq_wait(call_rmdir, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct read_args { ++ ssize_t *errp; ++ struct file *file; ++ union { ++ void *kbuf; ++ char __user *ubuf; ++ }; ++ size_t count; ++ loff_t *ppos; ++}; ++ ++static void call_read_k(void *args) ++{ ++ struct read_args *a = args; ++ LKTRTrace("%.*s, cnt %lu, pos %Ld\n", ++ DLNPair(a->file->f_dentry), (unsigned long)a->count, ++ *a->ppos); ++ *a->errp = do_vfsub_read_k(a->file, a->kbuf, a->count, a->ppos); ++} ++ ++ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count, ++ loff_t *ppos, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_read_u(file, ubuf, count, ppos); ++ else { ++ ssize_t err, read; ++ struct read_args args = { ++ .errp = &err, ++ .file = file, ++ .count = count, ++ .ppos = ppos ++ }; ++ ++ if (unlikely(!count)) ++ return 0; ++ ++ /* ++ * workaround an application bug. ++ * generally, read(2) or write(2) may return the value shorter ++ * than requested. But many applications don't support it, ++ * for example bash. ++ */ ++ err = -ENOMEM; ++ if (args.count > PAGE_SIZE) ++ args.count = PAGE_SIZE; ++ args.kbuf = kmalloc(args.count, GFP_KERNEL); ++ if (unlikely(!args.kbuf)) ++ goto out; ++ ++ read = 0; ++ do { ++ au_wkq_wait(call_read_k, &args, /*dlgt*/1); ++ if (unlikely(err > 0 ++ && copy_to_user(ubuf, args.kbuf, err))) { ++ err = -EFAULT; ++ goto out_free; ++ } else if (!err) ++ break; ++ else if (unlikely(err < 0)) ++ goto out_free; ++ count -= err; ++ /* do not read too much because of file i/o pointer */ ++ if (unlikely(count < args.count)) ++ args.count = count; ++ ubuf += err; ++ read += err; ++ } while (count); ++ smp_mb(); ++ err = read; ++ ++ out_free: ++ kfree(args.kbuf); ++ out: ++ return err; ++ } ++} ++ ++ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count, loff_t *ppos, ++ int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_read_k(file, kbuf, count, ppos); ++ else { ++ ssize_t err; ++ struct read_args args = { ++ .errp = &err, ++ .file = file, ++ .count = count, ++ .ppos = ppos ++ }; ++ args.kbuf = kbuf; ++ au_wkq_wait(call_read_k, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++struct write_args { ++ ssize_t *errp; ++ struct file *file; ++ union { ++ void *kbuf; ++ const char __user *ubuf; ++ }; ++ void *buf; ++ size_t count; ++ loff_t *ppos; ++}; ++ ++static void call_write_k(void *args) ++{ ++ struct write_args *a = args; ++ LKTRTrace("%.*s, cnt %lu, pos %Ld\n", ++ DLNPair(a->file->f_dentry), (unsigned long)a->count, ++ *a->ppos); ++ *a->errp = do_vfsub_write_k(a->file, a->kbuf, a->count, a->ppos); ++} ++ ++ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count, ++ loff_t *ppos, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_write_u(file, ubuf, count, ppos); ++ else { ++ ssize_t err, written; ++ struct write_args args = { ++ .errp = &err, ++ .file = file, ++ .count = count, ++ .ppos = ppos ++ }; ++ ++ if (unlikely(!count)) ++ return 0; ++ ++ /* ++ * workaround an application bug. ++ * generally, read(2) or write(2) may return the value shorter ++ * than requested. But many applications don't support it, ++ * for example bash. ++ */ ++ err = -ENOMEM; ++ if (args.count > PAGE_SIZE) ++ args.count = PAGE_SIZE; ++ args.kbuf = kmalloc(args.count, GFP_KERNEL); ++ if (unlikely(!args.kbuf)) ++ goto out; ++ ++ written = 0; ++ do { ++ if (unlikely(copy_from_user(args.kbuf, ubuf, args.count))) { ++ err = -EFAULT; ++ goto out_free; ++ } ++ ++ au_wkq_wait(call_write_k, &args, /*dlgt*/1); ++ if (err > 0) { ++ count -= err; ++ if (count < args.count) ++ args.count = count; ++ ubuf += err; ++ written += err; ++ } else if (!err) ++ break; ++ else if (unlikely(err < 0)) ++ goto out_free; ++ } while (count); ++ err = written; ++ ++ out_free: ++ kfree(args.kbuf); ++ out: ++ return err; ++ } ++} ++ ++ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos, ++ int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_write_k(file, kbuf, count, ppos); ++ else { ++ ssize_t err; ++ struct write_args args = { ++ .errp = &err, ++ .file = file, ++ .count = count, ++ .ppos = ppos ++ }; ++ args.kbuf = kbuf; ++ au_wkq_wait(call_write_k, &args, /*dlgt*/1); ++ return err; ++ } ++} ++ ++struct readdir_args { ++ int *errp; ++ struct file *file; ++ filldir_t filldir; ++ void *arg; ++}; ++ ++static void call_readdir(void *args) ++{ ++ struct readdir_args *a = args; ++ *a->errp = do_vfsub_readdir(a->file, a->filldir, a->arg); ++} ++ ++int vfsub_readdir(struct file *file, filldir_t filldir, void *arg, int dlgt) ++{ ++ if (!dlgt) ++ return do_vfsub_readdir(file, filldir, arg); ++ else { ++ int err; ++ struct readdir_args args = { ++ .errp = &err, ++ .file = file, ++ .filldir = filldir, ++ .arg = arg ++ }; ++ au_wkq_wait(call_readdir, &args, /*dlgt*/1); ++ return err; ++ } ++} ++#endif /* CONFIG_AUFS_DLGT */ ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct notify_change_args { ++ int *errp; ++ struct dentry *h_dentry; ++ struct iattr *ia; ++}; ++ ++static void call_notify_change(void *args) ++{ ++ struct notify_change_args *a = args; ++ struct inode *h_inode; ++ ++ LKTRTrace("%.*s, ia_valid 0x%x\n", ++ DLNPair(a->h_dentry), a->ia->ia_valid); ++ h_inode = a->h_dentry->d_inode; ++ IMustLock(h_inode); ++ ++ *a->errp = -EPERM; ++ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) { ++ lockdep_off(); ++ *a->errp = notify_change(a->h_dentry, a->ia); ++ lockdep_on(); ++ } ++ TraceErr(*a->errp); ++} ++ ++int vfsub_notify_change(struct dentry *dentry, struct iattr *ia, int dlgt) ++{ ++ int err; ++ struct notify_change_args args = { ++ .errp = &err, ++ .h_dentry = dentry, ++ .ia = ia ++ }; ++ ++#ifndef CONFIG_AUFS_DLGT ++ call_notify_change(&args); ++#else ++ if (!dlgt) ++ call_notify_change(&args); ++ else ++ au_wkq_wait(call_notify_change, &args, /*dlgt*/1); ++#endif ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct unlink_args { ++ int *errp; ++ struct inode *dir; ++ struct dentry *dentry; ++}; ++ ++static void call_unlink(void *args) ++{ ++ struct unlink_args *a = args; ++ struct inode *h_inode; ++ const int stop_sillyrename = (au_is_nfs(a->dentry->d_sb) ++ && atomic_read(&a->dentry->d_count) == 1); ++ ++ LKTRTrace("%.*s, stop_silly %d, cnt %d\n", ++ DLNPair(a->dentry), stop_sillyrename, ++ atomic_read(&a->dentry->d_count)); ++ IMustLock(a->dir); ++ ++ if (!stop_sillyrename) ++ dget(a->dentry); ++ h_inode = a->dentry->d_inode; ++ if (h_inode) ++ atomic_inc(&h_inode->i_count); ++#if 0 // partial testing ++ { ++ struct qstr *name = &a->dentry->d_name; ++ if (name->len == sizeof(AUFS_XINO_FNAME) - 1 ++ && !strncmp(name->name, AUFS_XINO_FNAME, name->len)) { ++ lockdep_off(); ++ *a->errp = vfs_unlink(a->dir, a->dentry); ++ lockdep_on(); ++ } else ++ err = -1; ++ } ++#else ++ // vfs_unlink() locks inode ++ lockdep_off(); ++ *a->errp = vfs_unlink(a->dir, a->dentry); ++ lockdep_on(); ++#endif ++ ++ if (!stop_sillyrename) ++ dput(a->dentry); ++ if (h_inode) ++ iput(h_inode); ++ ++ TraceErr(*a->errp); ++} ++ ++/* ++ * @dir: must be locked. ++ * @dentry: target dentry. ++ */ ++int vfsub_unlink(struct inode *dir, struct dentry *dentry, int dlgt) ++{ ++ int err; ++ struct unlink_args args = { ++ .errp = &err, ++ .dir = dir, ++ .dentry = dentry ++ }; ++ ++#ifndef CONFIG_AUFS_DLGT ++ call_unlink(&args); ++#else ++ if (!dlgt) ++ call_unlink(&args); ++ else ++ au_wkq_wait(call_unlink, &args, /*dlgt*/1); ++#endif ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct statfs_args { ++ int *errp; ++ void *arg; ++ struct kstatfs *buf; ++}; ++ ++static void call_statfs(void *args) ++{ ++ struct statfs_args *a = args; ++ *a->errp = vfs_statfs(a->arg, a->buf); ++} ++ ++int vfsub_statfs(void *arg, struct kstatfs *buf, int dlgt) ++{ ++ int err; ++ struct statfs_args args = { ++ .errp = &err, ++ .arg = arg, ++ .buf = buf ++ }; ++ ++#ifndef CONFIG_AUFS_DLGT ++ call_statfs(&args); ++#else ++ if (!dlgt) ++ call_statfs(&args); ++ else ++ au_wkq_wait(call_statfs, &args, /*dlgt*/1); ++#endif ++ return err; ++} +diff --git a/fs/aufs/vfsub.h b/fs/aufs/vfsub.h +new file mode 100755 +index 0000000..52f15cc +--- /dev/null ++++ b/fs/aufs/vfsub.h +@@ -0,0 +1,427 @@ ++/* ++ * Copyright (C) 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: vfsub.h,v 1.8 2007/05/14 03:39:10 sfjro Exp $ */ ++ ++#ifndef __AUFS_VFSUB_H__ ++#define __AUFS_VFSUB_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <asm/uaccess.h> ++#include "wkq.h" ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* simple abstractions, for future use */ ++static inline ++int do_vfsub_permission(struct inode *inode, int mask, struct nameidata *nd) ++{ ++ LKTRTrace("i%lu, mask 0x%x, nd %p\n", inode->i_ino, mask, nd); ++#if 0 ++#else ++ return permission(inode, mask, nd); ++#endif ++} ++ ++static inline ++struct file *vfsub_filp_open(const char *path, int oflags, int mode) ++{ ++ struct file *err; ++ ++ LKTRTrace("%s\n", path); ++ ++ lockdep_off(); ++ err = filp_open(path, oflags, mode); ++ lockdep_on(); ++ return err; ++} ++ ++static inline ++int vfsub_path_lookup(const char *name, unsigned int flags, ++ struct nameidata *nd) ++{ ++ int err; ++ ++ LKTRTrace("%s\n", name); ++ ++ //lockdep_off(); ++ err = path_lookup(name, flags, nd); ++ //lockdep_on(); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline ++int do_vfsub_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd) ++{ ++ LKTRTrace("i%lu, %.*s, 0x%x\n", dir->i_ino, DLNPair(dentry), mode); ++#if 0 ++#else ++ return vfs_create(dir, dentry, mode, nd); ++#endif ++} ++ ++static inline ++int do_vfsub_symlink(struct inode *dir, struct dentry *dentry, ++ const char *symname, int mode) ++{ ++ LKTRTrace("i%lu, %.*s, %s, 0x%x\n", ++ dir->i_ino, DLNPair(dentry), symname, mode); ++#if 0 ++#else ++ return vfs_symlink(dir, dentry, symname, mode); ++#endif ++} ++ ++static inline ++int do_vfsub_mknod(struct inode *dir, struct dentry *dentry, int mode, ++ dev_t dev) ++{ ++ LKTRTrace("i%lu, %.*s, 0x%x\n", dir->i_ino, DLNPair(dentry), mode); ++#if 0 ++#else ++ return vfs_mknod(dir, dentry, mode, dev); ++#endif ++} ++ ++static inline ++int do_vfsub_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry) ++{ ++ int err; ++ ++ LKTRTrace("%.*s, i%lu, %.*s\n", ++ DLNPair(src_dentry), dir->i_ino, DLNPair(dentry)); ++ ++ lockdep_off(); ++#if 0 ++#else ++ err = vfs_link(src_dentry, dir, dentry); ++#endif ++ lockdep_on(); ++ return err; ++} ++ ++static inline ++int do_vfsub_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry) ++{ ++ int err; ++ ++ LKTRTrace("i%lu, %.*s, i%lu, %.*s\n", ++ src_dir->i_ino, DLNPair(src_dentry), ++ dir->i_ino, DLNPair(dentry)); ++ ++ lockdep_off(); ++#if 0 ++#else ++ err = vfs_rename(src_dir, src_dentry, dir, dentry); ++#endif ++ lockdep_on(); ++ return err; ++} ++ ++static inline ++int do_vfsub_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ LKTRTrace("i%lu, %.*s, 0x%x\n", dir->i_ino, DLNPair(dentry), mode); ++#if 0 ++#else ++ return vfs_mkdir(dir, dentry, mode); ++#endif ++} ++ ++static inline int do_vfsub_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ int err; ++ ++ LKTRTrace("i%lu, %.*s\n", dir->i_ino, DLNPair(dentry)); ++ ++ lockdep_off(); ++#if 0 ++#else ++ err = vfs_rmdir(dir, dentry); ++#endif ++ lockdep_on(); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline ++ssize_t do_vfsub_read_u(struct file *file, char __user *ubuf, size_t count, ++ loff_t *ppos) ++{ ++ ssize_t err; ++ ++ LKTRTrace("%.*s, cnt %lu, pos %Ld\n", ++ DLNPair(file->f_dentry), (unsigned long)count, *ppos); ++ ++ /* nfs uses some locks */ ++ lockdep_off(); ++#if 0 ++#else ++ err = vfs_read(file, ubuf, count, ppos); ++#endif ++ lockdep_on(); ++ return err; ++} ++ ++// kernel_read() ?? ++static inline ++ssize_t do_vfsub_read_k(struct file *file, void *kbuf, size_t count, ++ loff_t *ppos) ++{ ++ ssize_t err; ++ mm_segment_t oldfs; ++ ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = do_vfsub_read_u(file, (char __user*)kbuf, count, ppos); ++ set_fs(oldfs); ++ return err; ++} ++ ++static inline ++ssize_t do_vfsub_write_u(struct file *file, const char __user *ubuf, ++ size_t count, loff_t *ppos) ++{ ++ ssize_t err; ++ ++ LKTRTrace("%.*s, cnt %lu, pos %Ld\n", ++ DLNPair(file->f_dentry), (unsigned long)count, *ppos); ++ ++ lockdep_off(); ++#if 0 ++#else ++ err = vfs_write(file, ubuf, count, ppos); ++#endif ++ lockdep_on(); ++ return err; ++} ++ ++static inline ++ssize_t do_vfsub_write_k(struct file *file, void *kbuf, size_t count, ++ loff_t *ppos) ++{ ++ ssize_t err; ++ mm_segment_t oldfs; ++ ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = do_vfsub_write_u(file, (const char __user*)kbuf, count, ppos); ++ set_fs(oldfs); ++ return err; ++} ++ ++static inline ++int do_vfsub_readdir(struct file *file, filldir_t filldir, void *arg) ++{ ++ int err; ++ ++ LKTRTrace("%.*s\n", DLNPair(file->f_dentry)); ++ ++ lockdep_off(); ++#if 0 ++#else ++ err = vfs_readdir(file, filldir, arg); ++#endif ++ lockdep_on(); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin) ++{ ++ loff_t err; ++ ++ LKTRTrace("%.*s\n", DLNPair(file->f_dentry)); ++ ++ lockdep_off(); ++#if 0 ++#else ++ err = vfs_llseek(file, offset, origin); ++#endif ++ lockdep_on(); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_DLGT ++static inline int need_dlgt(struct super_block *sb) ++{ ++ return (au_flag_test(sb, AuFlag_DLGT) && !is_au_wkq(current)); ++} ++ ++int vfsub_permission(struct inode *inode, int mask, struct nameidata *nd, ++ int dlgt); ++ ++int vfsub_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd, int dlgt); ++int vfsub_symlink(struct inode *dir, struct dentry *dentry, const char *symname, ++ int mode, int dlgt); ++int vfsub_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev, ++ int dlgt); ++int vfsub_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry, int dlgt); ++int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry, int dlgt); ++int vfsub_mkdir(struct inode *dir, struct dentry *dentry, int mode, int dlgt); ++int vfsub_rmdir(struct inode *dir, struct dentry *dentry, int dlgt); ++ ++ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count, ++ loff_t *ppos, int dlgt); ++ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count, loff_t *ppos, ++ int dlgt); ++ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count, ++ loff_t *ppos, int dlgt); ++ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos, ++ int dlgt); ++int vfsub_readdir(struct file *file, filldir_t filldir, void *arg, int dlgt); ++ ++#else ++ ++static inline int need_dlgt(struct super_block *sb) ++{ ++ return 0; ++} ++ ++static inline ++int vfsub_permission(struct inode *inode, int mask, struct nameidata *nd, ++ int dlgt) ++{ ++ return do_vfsub_permission(inode, mask, nd); ++} ++ ++static inline ++int vfsub_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd, int dlgt) ++{ ++ return do_vfsub_create(dir, dentry, mode, nd); ++} ++ ++static inline ++int vfsub_symlink(struct inode *dir, struct dentry *dentry, const char *symname, ++ int mode, int dlgt) ++{ ++ return do_vfsub_symlink(dir, dentry, symname, mode); ++} ++ ++static inline ++int vfsub_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev, ++ int dlgt) ++{ ++ return do_vfsub_mknod(dir, dentry, mode, dev); ++} ++ ++static inline ++int vfsub_link(struct dentry *src_dentry, struct inode *dir, ++ struct dentry *dentry, int dlgt) ++{ ++ return do_vfsub_link(src_dentry, dir, dentry); ++} ++ ++static inline ++int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry, ++ struct inode *dir, struct dentry *dentry, int dlgt) ++{ ++ return do_vfsub_rename(src_dir, src_dentry, dir, dentry); ++} ++ ++static inline ++int vfsub_mkdir(struct inode *dir, struct dentry *dentry, int mode, ++ int dlgt) ++{ ++ return do_vfsub_mkdir(dir, dentry, mode); ++} ++ ++static inline ++int vfsub_rmdir(struct inode *dir, struct dentry *dentry, int dlgt) ++{ ++ return do_vfsub_rmdir(dir, dentry); ++} ++ ++static inline ++ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count, ++ loff_t *ppos, int dlgt) ++{ ++ return do_vfsub_read_u(file, ubuf, count, ppos); ++} ++ ++static inline ++ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count, loff_t *ppos, ++ int dlgt) ++{ ++ return do_vfsub_read_k(file, kbuf, count, ppos); ++} ++ ++static inline ++ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count, ++ loff_t *ppos, int dlgt) ++{ ++ return do_vfsub_write_u(file, ubuf, count, ppos); ++} ++ ++static inline ++ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos, ++ int dlgt) ++{ ++ return do_vfsub_write_k(file, kbuf, count, ppos); ++} ++ ++static inline ++int vfsub_readdir(struct file *file, filldir_t filldir, void *arg, int dlgt) ++{ ++ return do_vfsub_readdir(file, filldir, arg); ++} ++#endif /* CONFIG_AUFS_DLGT */ ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline ++struct dentry *vfsub_lock_rename(struct dentry *d1, struct dentry *d2) ++{ ++ struct dentry *d; ++ ++ lockdep_off(); ++ d = lock_rename(d1, d2); ++ lockdep_on(); ++ return d; ++} ++ ++static inline void vfsub_unlock_rename(struct dentry *d1, struct dentry *d2) ++{ ++ lockdep_off(); ++ unlock_rename(d1, d2); ++ lockdep_on(); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int vfsub_notify_change(struct dentry *dentry, struct iattr *ia, int dlgt); ++int vfsub_unlink(struct inode *dir, struct dentry *dentry, int dlgt); ++int vfsub_statfs(void *arg, struct kstatfs *buf, int dlgt); ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_VFSUB_H__ */ +diff --git a/fs/aufs/whout.c b/fs/aufs/whout.c +new file mode 100755 +index 0000000..b7f874c +--- /dev/null ++++ b/fs/aufs/whout.c +@@ -0,0 +1,933 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: whout.c,v 1.14 2007/05/14 03:40:40 sfjro Exp $ */ ++ ++#include <linux/fs.h> ++#include <linux/namei.h> ++#include <linux/random.h> ++#include <linux/security.h> ++#include "aufs.h" ++ ++#define WH_MASK S_IRUGO ++ ++/* If a directory contains this file, then it is opaque. We start with the ++ * .wh. flag so that it is blocked by lookup. ++ */ ++static struct qstr diropq_name = { ++ .name = AUFS_WH_DIROPQ, ++ .len = sizeof(AUFS_WH_DIROPQ) - 1 ++}; ++ ++/* ++ * generate whiteout name, which is NOT terminated by NULL. ++ * @name: original d_name.name ++ * @len: original d_name.len ++ * @wh: whiteout qstr ++ * returns zero when succeeds, otherwise error. ++ * succeeded value as wh->name should be freed by au_free_whname(). ++ */ ++int au_alloc_whname(const char *name, int len, struct qstr *wh) ++{ ++ char *p; ++ ++ DEBUG_ON(!name || !len || !wh); ++ ++ if (unlikely(len > PATH_MAX - AUFS_WH_PFX_LEN)) ++ return -ENAMETOOLONG; ++ ++ wh->len = len + AUFS_WH_PFX_LEN; ++ wh->name = p = kmalloc(wh->len, GFP_KERNEL); ++ //if (LktrCond) {kfree(p); wh->name = p = NULL;} ++ if (p) { ++ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); ++ memcpy(p + AUFS_WH_PFX_LEN, name, len); ++ //smp_mb(); ++ return 0; ++ } ++ return -ENOMEM; ++} ++ ++void au_free_whname(struct qstr *wh) ++{ ++ DEBUG_ON(!wh || !wh->name); ++ kfree(wh->name); ++#ifdef CONFIG_AUFS_DEBUG ++ wh->name = NULL; ++#endif ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * test if the @wh_name exists under @hidden_parent. ++ * @try_sio specifies the necessary of super-io. ++ */ ++int is_wh(struct dentry *hidden_parent, struct qstr *wh_name, int try_sio, ++ struct lkup_args *lkup) ++{ ++ int err; ++ struct dentry *wh_dentry; ++ struct inode *hidden_dir; ++ ++ LKTRTrace("%.*s/%.*s, lkup{%p, %d}\n", DLNPair(hidden_parent), ++ wh_name->len, wh_name->name, lkup->nfsmnt, lkup->dlgt); ++ hidden_dir = hidden_parent->d_inode; ++ DEBUG_ON(!S_ISDIR(hidden_dir->i_mode)); ++ IMustLock(hidden_dir); ++ ++ if (!try_sio) ++ wh_dentry = lkup_one(wh_name->name, hidden_parent, ++ wh_name->len, lkup); ++ else ++ wh_dentry = sio_lkup_one(wh_name->name, hidden_parent, ++ wh_name->len, lkup); ++ //if (LktrCond) {dput(wh_dentry); wh_dentry = ERR_PTR(-1);} ++ err = PTR_ERR(wh_dentry); ++ if (IS_ERR(wh_dentry)) ++ goto out; ++ ++ err = 0; ++ if (!wh_dentry->d_inode) ++ goto out_wh; /* success */ ++ ++ err = 1; ++ if (S_ISREG(wh_dentry->d_inode->i_mode)) ++ goto out_wh; /* success */ ++ ++ err = -EIO; ++ IOErr("%.*s Invalid whiteout entry type 0%o.\n", ++ DLNPair(wh_dentry), wh_dentry->d_inode->i_mode); ++ ++ out_wh: ++ dput(wh_dentry); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * test if the @hidden_dentry sets opaque or not. ++ */ ++int is_diropq(struct dentry *hidden_dentry, struct lkup_args *lkup) ++{ ++ int err; ++ struct inode *hidden_dir; ++ ++ LKTRTrace("dentry %.*s\n", DLNPair(hidden_dentry)); ++ hidden_dir = hidden_dentry->d_inode; ++ DEBUG_ON(!S_ISDIR(hidden_dir->i_mode)); ++ IMustLock(hidden_dir); ++ ++ err = is_wh(hidden_dentry, &diropq_name, /*try_sio*/1, lkup); ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * returns a negative dentry whose name is unique and temporary. ++ */ ++struct dentry *lkup_whtmp(struct dentry *hidden_parent, struct qstr *prefix, ++ struct lkup_args *lkup) ++{ ++#define HEX_LEN 4 ++ struct dentry *dentry; ++ int len, i; ++ char defname[AUFS_WH_PFX_LEN * 2 + DNAME_INLINE_LEN_MIN + 1 ++ + HEX_LEN + 1], *name, *p; ++ static unsigned char cnt; ++ ++ LKTRTrace("hp %.*s, prefix %.*s\n", ++ DLNPair(hidden_parent), prefix->len, prefix->name); ++ DEBUG_ON(!hidden_parent->d_inode); ++ IMustLock(hidden_parent->d_inode); ++ ++ name = defname; ++ len = sizeof(defname) - DNAME_INLINE_LEN_MIN + prefix->len - 1; ++ if (unlikely(prefix->len > DNAME_INLINE_LEN_MIN)) { ++ dentry = ERR_PTR(-ENAMETOOLONG); ++ if (unlikely(len >= PATH_MAX)) ++ goto out; ++ dentry = ERR_PTR(-ENOMEM); ++ name = kmalloc(len + 1, GFP_KERNEL); ++ //if (LktrCond) {kfree(name); name = NULL;} ++ if (unlikely(!name)) ++ goto out; ++ } ++ ++ // doubly whiteout-ed ++ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2); ++ p = name + AUFS_WH_PFX_LEN * 2; ++ memcpy(p, prefix->name, prefix->len); ++ p += prefix->len; ++ *p++ = '.'; ++ DEBUG_ON(name + len + 1 - p <= HEX_LEN); ++ ++ for (i = 0; i < 3; i++) { ++ sprintf(p, "%.*d", HEX_LEN, cnt++); ++ dentry = sio_lkup_one(name, hidden_parent, len, lkup); ++ //if (LktrCond) {dput(dentry); dentry = ERR_PTR(-1);} ++ if (unlikely(IS_ERR(dentry) || !dentry->d_inode)) ++ goto out_name; ++ dput(dentry); ++ } ++ //Warn("could not get random name\n"); ++ dentry = ERR_PTR(-EEXIST); ++ Dbg("%.*s\n", len, name); ++ BUG(); ++ ++ out_name: ++ if (unlikely(name != defname)) ++ kfree(name); ++ out: ++ TraceErrPtr(dentry); ++ return dentry; ++#undef HEX_LEN ++} ++ ++/* ++ * rename the @dentry of @bindex to the whiteouted temporary name. ++ */ ++int rename_whtmp(struct dentry *dentry, aufs_bindex_t bindex) ++{ ++ int err; ++ struct inode *hidden_dir; ++ struct dentry *hidden_dentry, *hidden_parent, *tmp_dentry; ++ struct super_block *sb; ++ struct lkup_args lkup; ++ ++ LKTRTrace("%.*s, b%d\n", DLNPair(dentry), bindex); ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ DEBUG_ON(!hidden_dentry || !hidden_dentry->d_inode); ++ hidden_parent = hidden_dentry->d_parent; ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ ++ sb = dentry->d_sb; ++ lkup.nfsmnt = au_nfsmnt(sb, bindex); ++ lkup.dlgt = need_dlgt(sb); ++ tmp_dentry = lkup_whtmp(hidden_parent, &hidden_dentry->d_name, &lkup); ++ //if (LktrCond) {dput(tmp_dentry); tmp_dentry = ERR_PTR(-1);} ++ err = PTR_ERR(tmp_dentry); ++ if (!IS_ERR(tmp_dentry)) { ++ /* under the same dir, no need to lock_rename() */ ++ err = vfsub_rename(hidden_dir, hidden_dentry, ++ hidden_dir, tmp_dentry, lkup.dlgt); ++ //if (LktrCond) err = -1; //unavailable ++ TraceErr(err); ++ dput(tmp_dentry); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int au_unlink_wh_dentry(struct inode *hidden_dir, struct dentry *wh_dentry, ++ struct dentry *dentry, int dlgt) ++{ ++ int err; ++ ++ LKTRTrace("hi%lu, wh %.*s, d %p\n", hidden_dir->i_ino, ++ DLNPair(wh_dentry), dentry); ++ DEBUG_ON((dentry && dbwh(dentry) == -1) ++ || !wh_dentry->d_inode ++ || !S_ISREG(wh_dentry->d_inode->i_mode)); ++ IMustLock(hidden_dir); ++ ++ err = vfsub_unlink(hidden_dir, wh_dentry, dlgt); ++ //if (LktrCond) err = -1; // unavailable ++ if (!err && dentry) ++ set_dbwh(dentry, -1); ++ ++ TraceErr(err); ++ return err; ++} ++ ++static int unlink_wh_name(struct dentry *hidden_parent, struct qstr *wh, ++ struct lkup_args *lkup) ++{ ++ int err; ++ struct inode *hidden_dir; ++ struct dentry *hidden_dentry; ++ ++ LKTRTrace("%.*s/%.*s\n", DLNPair(hidden_parent), LNPair(wh)); ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ ++ // au_test_perm() is already done ++ hidden_dentry = lkup_one(wh->name, hidden_parent, wh->len, lkup); ++ //if (LktrCond) {dput(hidden_dentry); hidden_dentry = ERR_PTR(-1);} ++ if (!IS_ERR(hidden_dentry)) { ++ err = 0; ++ if (hidden_dentry->d_inode) ++ err = vfsub_unlink(hidden_dir, hidden_dentry, ++ lkup->dlgt); ++ dput(hidden_dentry); ++ } else ++ err = PTR_ERR(hidden_dentry); ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static void clean_wh(struct inode *h_dir, struct dentry *wh) ++{ ++ TraceEnter(); ++ if (wh->d_inode) { ++ int err = vfsub_unlink(h_dir, wh, /*dlgt*/0); ++ if (unlikely(err)) ++ Warn("failed unlink %.*s (%d), ignored.\n", ++ DLNPair(wh), err); ++ } ++} ++ ++static void clean_plink(struct inode *h_dir, struct dentry *plink) ++{ ++ TraceEnter(); ++ if (plink->d_inode) { ++ int err = vfsub_rmdir(h_dir, plink, /*dlgt*/0); ++ if (unlikely(err)) ++ Warn("failed rmdir %.*s (%d), ignored.\n", ++ DLNPair(plink), err); ++ } ++} ++ ++static int test_linkable(struct inode *h_dir) ++{ ++ if (h_dir->i_op && h_dir->i_op->link) ++ return 0; ++ return -ENOSYS; ++} ++ ++static int plink_dir(struct inode *h_dir, struct dentry *plink) ++{ ++ int err; ++ ++ err = -EEXIST; ++ if (!plink->d_inode) { ++ int mode = S_IRWXU; ++ if (unlikely(au_is_nfs(plink->d_sb))) ++ mode |= S_IXUGO; ++ err = vfsub_mkdir(h_dir, plink, mode, /*dlgt*/0); ++ } else if (S_ISDIR(plink->d_inode->i_mode)) ++ err = 0; ++ else ++ Err("unknown %.*s exists\n", DLNPair(plink)); ++ ++ return err; ++} ++ ++/* ++ * initialize the whiteout base file/dir for @br. ++ */ ++int init_wh(struct dentry *h_root, struct aufs_branch *br, ++ struct vfsmount *nfsmnt, struct super_block *sb) ++{ ++ int err; ++ struct dentry *wh, *plink; ++ struct inode *h_dir; ++ static struct qstr base_name[] = { ++ {.name = AUFS_WH_BASENAME, .len = sizeof(AUFS_WH_BASENAME) - 1}, ++ {.name = AUFS_WH_PLINKDIR, .len = sizeof(AUFS_WH_PLINKDIR) - 1} ++ }; ++ struct lkup_args lkup = { ++ .nfsmnt = nfsmnt, ++ .dlgt = 0 // always no dlgt ++ }; ++ const int do_plink = au_flag_test(sb, AuFlag_PLINK); ++ ++ LKTRTrace("nfsmnt %p\n", nfsmnt); ++ BrWhMustWriteLock(br); ++ SiMustWriteLock(sb); ++ h_dir = h_root->d_inode; ++ IMustLock(h_dir); ++ ++ // doubly whiteouted ++ wh = lkup_wh(h_root, base_name + 0, &lkup); ++ //if (LktrCond) {dput(wh); wh = ERR_PTR(-1);} ++ err = PTR_ERR(wh); ++ if (IS_ERR(wh)) ++ goto out; ++ DEBUG_ON(br->br_wh && br->br_wh != wh); ++ ++ plink = lkup_wh(h_root, base_name + 1, &lkup); ++ err = PTR_ERR(plink); ++ if (IS_ERR(plink)) ++ goto out_dput_wh; ++ DEBUG_ON(br->br_plink && br->br_plink != plink); ++ ++ dput(br->br_wh); ++ dput(br->br_plink); ++ br->br_wh = br->br_plink = NULL; ++ ++ err = 0; ++ switch (br->br_perm) { ++ case AuBr_RR: ++ case AuBr_RO: ++ case AuBr_RRWH: ++ case AuBr_ROWH: ++ clean_wh(h_dir, wh); ++ clean_plink(h_dir, plink); ++ break; ++ ++ case AuBr_RWNoLinkWH: ++ clean_wh(h_dir, wh); ++ if (do_plink) { ++ err = test_linkable(h_dir); ++ if (unlikely(err)) ++ goto out_nolink; ++ ++ err = plink_dir(h_dir, plink); ++ if (unlikely(err)) ++ goto out_err; ++ br->br_plink = dget(plink); ++ } else ++ clean_plink(h_dir, plink); ++ break; ++ ++ case AuBr_RW: ++ /* ++ * for the moment, aufs supports the branch filesystem ++ * which does not support link(2). ++ * testing on FAT which does not support i_op->setattr() fully either, ++ * copyup failed. ++ * finally, such filesystem will not be used as the writable branch. ++ */ ++ err = test_linkable(h_dir); ++ if (unlikely(err)) ++ goto out_nolink; ++ ++ err = -EEXIST; ++ if (!wh->d_inode) ++ err = vfsub_create(h_dir, wh, WH_MASK, NULL, /*dlgt*/0); ++ else if (S_ISREG(wh->d_inode->i_mode)) ++ err = 0; ++ else ++ Err("unknown %.*s/%.*s exists\n", ++ DLNPair(h_root), DLNPair(wh)); ++ if (unlikely(err)) ++ goto out_err; ++ ++ if (do_plink) { ++ err = plink_dir(h_dir, plink); ++ if (unlikely(err)) ++ goto out_err; ++ br->br_plink = dget(plink); ++ } else ++ clean_plink(h_dir, plink); ++ br->br_wh = dget(wh); ++ break; ++ ++ default: ++ BUG(); ++ } ++ ++ out_dput: ++ dput(plink); ++ out_dput_wh: ++ dput(wh); ++ out: ++ TraceErr(err); ++ return err; ++ out_nolink: ++ Err("%.*s doesn't support link(2), use noplink and rw+nolwh\n", ++ DLNPair(h_root)); ++ goto out_dput; ++ out_err: ++ Err("an error(%d) on the writable branch %.*s(%s)\n", ++ err, DLNPair(h_root), au_sbtype(h_root->d_sb)); ++ goto out_dput; ++} ++ ++struct reinit_br_wh { ++ struct super_block *sb; ++ struct aufs_branch *br; ++}; ++ ++static void reinit_br_wh(void *arg) ++{ ++ int err; ++ struct reinit_br_wh *a = arg; ++ struct inode *hidden_dir, *dir; ++ struct dentry *hidden_root; ++ aufs_bindex_t bindex; ++ ++ TraceEnter(); ++ DEBUG_ON(!a->br->br_wh || !a->br->br_wh->d_inode || current->fsuid); ++ ++ err = 0; ++ /* big lock */ ++ si_write_lock(a->sb); ++ if (unlikely(!br_writable(a->br->br_perm))) ++ goto out; ++ bindex = find_brindex(a->sb, a->br->br_id); ++ if (unlikely(bindex < 0)) ++ goto out; ++ ++ dir = a->sb->s_root->d_inode; ++ hidden_root = a->br->br_wh->d_parent; ++ hidden_dir = hidden_root->d_inode; ++ DEBUG_ON(!hidden_dir->i_op || !hidden_dir->i_op->link); ++ hdir_lock(hidden_dir, dir, bindex); ++ br_wh_write_lock(a->br); ++ err = vfsub_unlink(hidden_dir, a->br->br_wh, /*dlgt*/0); ++ //if (LktrCond) err = -1; ++ dput(a->br->br_wh); ++ a->br->br_wh = NULL; ++ if (!err) ++ err = init_wh(hidden_root, a->br, au_do_nfsmnt(a->br->br_mnt), ++ a->sb); ++ br_wh_write_unlock(a->br); ++ hdir_unlock(hidden_dir, dir, bindex); ++ ++ out: ++ atomic_dec(&a->br->br_wh_running); ++ br_put(a->br); ++ si_write_unlock(a->sb); ++ au_mntput(a->sb); ++ kfree(arg); ++ if (unlikely(err)) ++ IOErr("err %d\n", err); ++} ++ ++static void kick_reinit_br_wh(struct super_block *sb, struct aufs_branch *br) ++{ ++ int do_dec; ++ struct reinit_br_wh *arg; ++ ++ do_dec = 1; ++ if (atomic_inc_return(&br->br_wh_running) != 1) ++ goto out; ++ ++ // ignore ENOMEM ++ arg = kmalloc(sizeof(*arg), GFP_KERNEL); ++ if (arg) { ++ // dec(wh_running), kfree(arg) and br_put() in reinit function ++ arg->sb = sb; ++ arg->br = br; ++ br_get(br); ++ /* prohibit umount */ ++ au_mntget(sb); ++ au_wkq_nowait(reinit_br_wh, arg, /*dlgt*/0); ++ do_dec = 0; ++ } ++ ++ out: ++ if (do_dec) ++ atomic_dec(&br->br_wh_running); ++} ++ ++/* ++ * create the whiteoute @wh. ++ */ ++static int link_or_create_wh(struct dentry *wh, struct super_block *sb, ++ aufs_bindex_t bindex) ++{ ++ int err, dlgt; ++ struct aufs_branch *br; ++ struct dentry *hidden_parent; ++ struct inode *hidden_dir; ++ ++ LKTRTrace("%.*s\n", DLNPair(wh)); ++ SiMustReadLock(sb); ++ hidden_parent = wh->d_parent; ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ ++ dlgt = need_dlgt(sb); ++ br = stobr(sb, bindex); ++ br_wh_read_lock(br); ++ if (br->br_wh) { ++ err = vfsub_link(br->br_wh, hidden_dir, wh, dlgt); ++ if (!err || err != -EMLINK) ++ goto out; ++ ++ // link count full. re-initialize br_wh. ++ kick_reinit_br_wh(sb, br); ++ } ++ ++ // return this error in this context ++ err = vfsub_create(hidden_dir, wh, WH_MASK, NULL, dlgt); ++ ++ out: ++ br_wh_read_unlock(br); ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * create or remove the diropq. ++ */ ++static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex, ++ int do_create, int dlgt) ++{ ++ struct dentry *opq_dentry, *hidden_dentry; ++ struct inode *hidden_dir; ++ int err; ++ struct super_block *sb; ++ struct lkup_args lkup; ++ ++ LKTRTrace("%.*s, bindex %d, do_create %d\n", DLNPair(dentry), ++ bindex, do_create); ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ DEBUG_ON(!hidden_dentry); ++ hidden_dir = hidden_dentry->d_inode; ++ DEBUG_ON(!hidden_dir || !S_ISDIR(hidden_dir->i_mode)); ++ IMustLock(hidden_dir); ++ ++ // already checked by au_test_perm(). ++ sb = dentry->d_sb; ++ lkup.nfsmnt = au_nfsmnt(sb, bindex); ++ lkup.dlgt = dlgt; ++ opq_dentry = lkup_one(diropq_name.name, hidden_dentry, diropq_name.len, ++ &lkup); ++ //if (LktrCond) {dput(opq_dentry); opq_dentry = ERR_PTR(-1);} ++ if (IS_ERR(opq_dentry)) ++ goto out; ++ ++ if (do_create) { ++ DEBUG_ON(opq_dentry->d_inode); ++ err = link_or_create_wh(opq_dentry, sb, bindex); ++ //if (LktrCond) {vfs_unlink(hidden_dir, opq_dentry); err = -1;} ++ if (!err) { ++ set_dbdiropq(dentry, bindex); ++ goto out; /* success */ ++ } ++ } else { ++ DEBUG_ON(/* !S_ISDIR(dentry->d_inode->i_mode) ++ * || */!opq_dentry->d_inode); ++ err = vfsub_unlink(hidden_dir, opq_dentry, lkup.dlgt); ++ //if (LktrCond) err = -1; ++ if (!err) ++ set_dbdiropq(dentry, -1); ++ } ++ dput(opq_dentry); ++ opq_dentry = ERR_PTR(err); ++ ++ out: ++ TraceErrPtr(opq_dentry); ++ return opq_dentry; ++} ++ ++struct do_diropq_args { ++ struct dentry **errp; ++ struct dentry *dentry; ++ aufs_bindex_t bindex; ++ int do_create, dlgt; ++}; ++ ++static void call_do_diropq(void *args) ++{ ++ struct do_diropq_args *a = args; ++ *a->errp = do_diropq(a->dentry, a->bindex, a->do_create, a->dlgt); ++} ++ ++struct dentry *sio_diropq(struct dentry *dentry, aufs_bindex_t bindex, ++ int do_create, int dlgt) ++{ ++ struct dentry *diropq, *hidden_dentry; ++ ++ LKTRTrace("%.*s, bindex %d, do_create %d\n", ++ DLNPair(dentry), bindex, do_create); ++ ++ hidden_dentry = au_h_dptr_i(dentry, bindex); ++ if (!au_test_perm(hidden_dentry->d_inode, MAY_EXEC | MAY_WRITE, dlgt)) ++ diropq = do_diropq(dentry, bindex, do_create, dlgt); ++ else { ++ struct do_diropq_args args = { ++ .errp = &diropq, ++ .dentry = dentry, ++ .bindex = bindex, ++ .do_create = do_create, ++ .dlgt = dlgt ++ }; ++ au_wkq_wait(call_do_diropq, &args, /*dlgt*/0); ++ } ++ ++ TraceErrPtr(diropq); ++ return diropq; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * lookup whiteout dentry. ++ * @hidden_parent: hidden parent dentry which must exist and be locked ++ * @base_name: name of dentry which will be whiteouted ++ * returns dentry for whiteout. ++ */ ++struct dentry *lkup_wh(struct dentry *hidden_parent, struct qstr *base_name, ++ struct lkup_args *lkup) ++{ ++ int err; ++ struct qstr wh_name; ++ struct dentry *wh_dentry; ++ ++ LKTRTrace("%.*s/%.*s\n", DLNPair(hidden_parent), LNPair(base_name)); ++ IMustLock(hidden_parent->d_inode); ++ ++ err = au_alloc_whname(base_name->name, base_name->len, &wh_name); ++ //if (LktrCond) {au_free_whname(&wh_name); err = -1;} ++ wh_dentry = ERR_PTR(err); ++ if (!err) { ++ // do not superio. ++ wh_dentry = lkup_one(wh_name.name, hidden_parent, wh_name.len, ++ lkup); ++ au_free_whname(&wh_name); ++ } ++ TraceErrPtr(wh_dentry); ++ return wh_dentry; ++} ++ ++/* ++ * link/create a whiteout for @dentry on @bindex. ++ */ ++struct dentry *simple_create_wh(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *hidden_parent, ++ struct lkup_args *lkup) ++{ ++ struct dentry *wh_dentry; ++ int err; ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s/%.*s on b%d\n", DLNPair(hidden_parent), ++ DLNPair(dentry), bindex); ++ ++ sb = dentry->d_sb; ++ wh_dentry = lkup_wh(hidden_parent, &dentry->d_name, lkup); ++ //au_nfsmnt(sb, bindex), need_dlgt(sb)); ++ //if (LktrCond) {dput(wh_dentry); wh_dentry = ERR_PTR(-1);} ++ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) { ++ IMustLock(hidden_parent->d_inode); ++ err = link_or_create_wh(wh_dentry, sb, bindex); ++ if (!err) ++ set_dbwh(dentry, bindex); ++ else { ++ dput(wh_dentry); ++ wh_dentry = ERR_PTR(err); ++ } ++ } ++ ++ TraceErrPtr(wh_dentry); ++ return wh_dentry; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* Delete all whiteouts in this directory in branch bindex. */ ++static int del_wh_children(struct aufs_nhash *whlist, ++ struct dentry *hidden_parent, aufs_bindex_t bindex, ++ struct lkup_args *lkup) ++{ ++ int err, i; ++ struct qstr wh_name; ++ char *p; ++ struct inode *hidden_dir; ++ struct hlist_head *head; ++ struct aufs_wh *tpos; ++ struct hlist_node *pos; ++ struct aufs_destr *str; ++ ++ LKTRTrace("%.*s\n", DLNPair(hidden_parent)); ++ hidden_dir = hidden_parent->d_inode; ++ IMustLock(hidden_dir); ++ DEBUG_ON(IS_RDONLY(hidden_dir)); ++ //SiMustReadLock(??); ++ ++ err = -ENOMEM; ++ wh_name.name = p = __getname(); ++ //if (LktrCond) {__putname(p); wh_name.name = p = NULL;} ++ if (unlikely(!wh_name.name)) ++ goto out; ++ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN); ++ p += AUFS_WH_PFX_LEN; ++ ++ // already checked by au_test_perm(). ++ err = 0; ++ for (i = 0; !err && i < AUFS_NHASH_SIZE; i++) { ++ head = whlist->heads + i; ++ hlist_for_each_entry(tpos, pos, head, wh_hash) { ++ if (tpos->wh_bindex != bindex) ++ continue; ++ str = &tpos->wh_str; ++ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) { ++ memcpy(p, str->name, str->len); ++ wh_name.len = AUFS_WH_PFX_LEN + str->len; ++ err = unlink_wh_name(hidden_parent, &wh_name, ++ lkup); ++ //if (LktrCond) err = -1; ++ if (!err) ++ continue; ++ break; ++ } ++ IOErr("whiteout name too long %.*s\n", ++ str->len, str->name); ++ err = -EIO; ++ break; ++ } ++ } ++ __putname(wh_name.name); ++ ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++struct del_wh_children_args { ++ int *errp; ++ struct aufs_nhash *whlist; ++ struct dentry *hidden_parent; ++ aufs_bindex_t bindex; ++ struct lkup_args *lkup; ++}; ++ ++static void call_del_wh_children(void *args) ++{ ++ struct del_wh_children_args *a = args; ++ *a->errp = del_wh_children(a->whlist, a->hidden_parent, a->bindex, ++ a->lkup); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * rmdir the whiteouted temporary named dir @hidden_dentry. ++ * @whlist: whiteouted children. ++ */ ++int rmdir_whtmp(struct dentry *hidden_dentry, struct aufs_nhash *whlist, ++ aufs_bindex_t bindex, struct inode *dir, struct inode *inode) ++{ ++ int err; ++ struct inode *hidden_inode, *hidden_dir; ++ struct lkup_args lkup; ++ struct super_block *sb; ++ ++ LKTRTrace("hd %.*s, b%d, i%lu\n", ++ DLNPair(hidden_dentry), bindex, dir->i_ino); ++ IMustLock(dir); ++ IiMustAnyLock(dir); ++ hidden_dir = hidden_dentry->d_parent->d_inode; ++ IMustLock(hidden_dir); ++ ++ sb = inode->i_sb; ++ lkup.nfsmnt = au_nfsmnt(sb, bindex); ++ lkup.dlgt = need_dlgt(sb); ++ hidden_inode = hidden_dentry->d_inode; ++ DEBUG_ON(hidden_inode != au_h_iptr_i(inode, bindex)); ++ hdir2_lock(hidden_inode, inode, bindex); ++ if (!au_test_perm(hidden_inode, MAY_EXEC | MAY_WRITE, lkup.dlgt)) ++ err = del_wh_children(whlist, hidden_dentry, bindex, &lkup); ++ else { ++ // ugly ++ int dlgt = lkup.dlgt; ++ struct del_wh_children_args args = { ++ .errp = &err, ++ .whlist = whlist, ++ .hidden_parent = hidden_dentry, ++ .bindex = bindex, ++ .lkup = &lkup ++ }; ++ ++ lkup.dlgt = 0; ++ au_wkq_wait(call_del_wh_children, &args, /*dlgt*/0); ++ lkup.dlgt = dlgt; ++ } ++ hdir_unlock(hidden_inode, inode, bindex); ++ ++ if (!err) { ++ err = vfsub_rmdir(hidden_dir, hidden_dentry, lkup.dlgt); ++ //d_drop(hidden_dentry); ++ //if (LktrCond) err = -1; ++ } ++ ++ if (!err) { ++ if (ibstart(dir) == bindex) { ++ au_cpup_attr_timesizes(dir); ++ //au_cpup_attr_nlink(dir); ++ dir->i_nlink--; ++ } ++ return 0; /* success */ ++ } ++ ++ Warn("failed removing %.*s(%d), ignored\n", ++ DLNPair(hidden_dentry), err); ++ return err; ++} ++ ++static void do_rmdir_whtmp(void *arg) ++{ ++ int err; ++ struct rmdir_whtmp_arg *a = arg; ++ struct super_block *sb; ++ ++ LKTRTrace("%.*s, b%d, dir i%lu\n", ++ DLNPair(a->h_dentry), a->bindex, a->dir->i_ino); ++ ++ i_lock(a->dir); ++ sb = a->dir->i_sb; ++ si_read_lock(sb); ++ err = test_ro(sb, a->bindex, NULL); ++ if (!err) { ++ struct inode *hidden_dir = a->h_dentry->d_parent->d_inode; ++ ++ ii_write_lock_child(a->inode); ++ ii_write_lock_parent(a->dir); ++ hdir_lock(hidden_dir, a->dir, a->bindex); ++ err = rmdir_whtmp(a->h_dentry, &a->whlist, a->bindex, ++ a->dir, a->inode); ++ hdir_unlock(hidden_dir, a->dir, a->bindex); ++ ii_write_unlock(a->dir); ++ ii_write_unlock(a->inode); ++ } ++ dput(a->h_dentry); ++ nhash_fin(&a->whlist); ++ iput(a->inode); ++ si_read_unlock(sb); ++ au_mntput(sb); ++ i_unlock(a->dir); ++ iput(a->dir); ++ kfree(arg); ++ if (unlikely(err)) ++ IOErr("err %d\n", err); ++} ++ ++void kick_rmdir_whtmp(struct dentry *hidden_dentry, struct aufs_nhash *whlist, ++ aufs_bindex_t bindex, struct inode *dir, ++ struct inode *inode, struct rmdir_whtmp_arg *arg) ++{ ++ LKTRTrace("%.*s\n", DLNPair(hidden_dentry)); ++ IMustLock(dir); ++ ++ // all post-process will be done in do_rmdir_whtmp(). ++ arg->h_dentry = dget(hidden_dentry); ++ nhash_init(&arg->whlist); ++ nhash_move(&arg->whlist, whlist); ++ arg->bindex = bindex; ++ arg->dir = igrab(dir); ++ arg->inode = igrab(inode); ++ /* prohibit umount */ ++ au_mntget(dir->i_sb); ++ ++ au_wkq_nowait(do_rmdir_whtmp, arg, /*dlgt*/0); ++} +diff --git a/fs/aufs/whout.h b/fs/aufs/whout.h +new file mode 100755 +index 0000000..d44c3cd +--- /dev/null ++++ b/fs/aufs/whout.h +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: whout.h,v 1.8 2007/05/14 03:41:52 sfjro Exp $ */ ++ ++#ifndef __AUFS_WHOUT_H__ ++#define __AUFS_WHOUT_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/fs.h> ++#include <linux/aufs_type.h> ++ ++int au_alloc_whname(const char *name, int len, struct qstr *wh); ++void au_free_whname(struct qstr *wh); ++ ++struct lkup_args; ++int is_wh(struct dentry *h_parent, struct qstr *wh_name, int try_sio, ++ struct lkup_args *lkup); ++int is_diropq(struct dentry *h_dentry, struct lkup_args *lkup); ++ ++struct dentry *lkup_whtmp(struct dentry *h_parent, struct qstr *prefix, ++ struct lkup_args *lkup); ++int rename_whtmp(struct dentry *dentry, aufs_bindex_t bindex); ++int au_unlink_wh_dentry(struct inode *h_dir, struct dentry *wh_dentry, ++ struct dentry *dentry, int dlgt); ++ ++struct aufs_branch; ++int init_wh(struct dentry *h_parent, struct aufs_branch *br, ++ struct vfsmount *nfsmnt, struct super_block *sb); ++ ++struct dentry *sio_diropq(struct dentry *dentry, aufs_bindex_t bindex, ++ int do_create, int dlgt); ++ ++struct dentry *lkup_wh(struct dentry *h_parent, struct qstr *base_name, ++ struct lkup_args *lkup); ++struct dentry *simple_create_wh(struct dentry *dentry, aufs_bindex_t bindex, ++ struct dentry *h_parent, ++ struct lkup_args *lkup); ++ ++/* real rmdir the whiteout-ed dir */ ++struct rmdir_whtmp_arg { ++ struct dentry *h_dentry; ++ struct aufs_nhash whlist; ++ aufs_bindex_t bindex; ++ struct inode *dir, *inode; ++}; ++ ++struct aufs_nhash; ++int rmdir_whtmp(struct dentry *h_dentry, struct aufs_nhash *whlist, ++ aufs_bindex_t bindex, struct inode *dir, struct inode *inode); ++void kick_rmdir_whtmp(struct dentry *h_dentry, struct aufs_nhash *whlist, ++ aufs_bindex_t bindex, struct inode *dir, ++ struct inode *inode, struct rmdir_whtmp_arg *arg); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline ++struct dentry *create_diropq(struct dentry *dentry, aufs_bindex_t bindex, ++ int dlgt) ++{ ++ return sio_diropq(dentry, bindex, 1, dlgt); ++} ++ ++static inline ++int remove_diropq(struct dentry *dentry, aufs_bindex_t bindex, int dlgt) ++{ ++ return PTR_ERR(sio_diropq(dentry, bindex, 0, dlgt)); ++} ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_WHOUT_H__ */ +diff --git a/fs/aufs/wkq.c b/fs/aufs/wkq.c +new file mode 100755 +index 0000000..b5ab023 +--- /dev/null ++++ b/fs/aufs/wkq.c +@@ -0,0 +1,283 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: wkq.c,v 1.14 2007/05/14 03:39:10 sfjro Exp $ */ ++ ++#include <linux/module.h> ++#include "aufs.h" ++ ++struct au_wkq *au_wkq; ++ ++struct au_cred { ++#ifdef CONFIG_AUFS_DLGT ++ uid_t fsuid; ++ gid_t fsgid; ++ kernel_cap_t cap_effective, cap_inheritable, cap_permitted; ++ //unsigned keep_capabilities:1; ++ //struct user_struct *user; ++ //struct fs_struct *fs; ++ //struct nsproxy *nsproxy; ++#endif ++}; ++ ++struct au_wkinfo { ++ struct work_struct wk; ++ ++ unsigned int wait:1; ++ unsigned int dlgt:1; ++ struct au_cred cred; ++ ++ au_wkq_func_t func; ++ void *args; ++ ++ atomic_t *busyp; ++ struct completion *comp; ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_DLGT ++static void cred_store(struct au_cred *cred) ++{ ++ cred->fsuid = current->fsuid; ++ cred->fsgid = current->fsgid; ++ cred->cap_effective = current->cap_effective; ++ cred->cap_inheritable = current->cap_inheritable; ++ cred->cap_permitted = current->cap_permitted; ++} ++ ++static void cred_revert(struct au_cred *cred) ++{ ++ DEBUG_ON(!is_au_wkq(current)); ++ current->fsuid = cred->fsuid; ++ current->fsgid = cred->fsgid; ++ current->cap_effective = cred->cap_effective; ++ current->cap_inheritable = cred->cap_inheritable; ++ current->cap_permitted = cred->cap_permitted; ++} ++ ++static void cred_switch(struct au_cred *old, struct au_cred *new) ++{ ++ cred_store(old); ++ cred_revert(new); ++} ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++static void update_busy(struct au_wkq *wkq, struct au_wkinfo *wkinfo) ++{ ++#ifdef CONFIG_AUFS_SYSAUFS ++ unsigned int new, old; ++ ++ do { ++ new = atomic_read(wkinfo->busyp); ++ old = wkq->max_busy; ++ if (new <= old) ++ break; ++ } while (cmpxchg(&wkq->max_busy, old, new) == old); ++#endif ++} ++ ++static int enqueue(struct au_wkq *wkq, struct au_wkinfo *wkinfo) ++{ ++ wkinfo->busyp = &wkq->busy; ++ update_busy(wkq, wkinfo); ++ if (wkinfo->wait) ++ return !queue_work(wkq->q, &wkinfo->wk); ++ else ++ return !schedule_work(&wkinfo->wk); ++} ++ ++static void do_wkq(struct au_wkinfo *wkinfo) ++{ ++ unsigned int idle, n; ++ int i, idle_idx; ++ ++ TraceEnter(); ++ ++ while (1) { ++ if (wkinfo->wait) { ++ idle_idx = 0; ++ idle = UINT_MAX; ++ for (i = 0; i < aufs_nwkq; i++) { ++ n = atomic_inc_return(&au_wkq[i].busy); ++ if (n == 1 && !enqueue(au_wkq + i, wkinfo)) ++ return; /* success */ ++ ++ if (n < idle) { ++ idle_idx = i; ++ idle = n; ++ } ++ atomic_dec(&au_wkq[i].busy); ++ } ++ } else ++ idle_idx = aufs_nwkq; ++ ++ atomic_inc(&au_wkq[idle_idx].busy); ++ if (!enqueue(au_wkq + idle_idx, wkinfo)) ++ return; /* success */ ++ ++ /* impossible? */ ++ Warn1("failed to queue_work()\n"); ++ yield(); ++ } ++} ++ ++static AuWkqFunc(wkq_func, wk) ++{ ++ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk); ++ ++ LKTRTrace("wkinfo{%u, %u, %p, %p, %p}\n", ++ wkinfo->wait, wkinfo->dlgt, wkinfo->func, wkinfo->busyp, ++ wkinfo->comp); ++#ifdef CONFIG_AUFS_DLGT ++ if (!wkinfo->dlgt) ++ wkinfo->func(wkinfo->args); ++ else { ++ struct au_cred cred; ++ cred_switch(&cred, &wkinfo->cred); ++ wkinfo->func(wkinfo->args); ++ cred_revert(&cred); ++ } ++#else ++ wkinfo->func(wkinfo->args); ++#endif ++ atomic_dec(wkinfo->busyp); ++ if (wkinfo->wait) ++ complete(wkinfo->comp); ++ else { ++ kfree(wkinfo); ++ module_put(THIS_MODULE); ++ } ++} ++ ++void au_wkq_run(au_wkq_func_t func, void *args, int dlgt, int do_wait) ++{ ++ DECLARE_COMPLETION_ONSTACK(comp); ++ struct au_wkinfo _wkinfo = { ++ .wait = 1, ++ .dlgt = !!dlgt, ++ .func = func, ++ .args = args, ++ .comp = &comp ++ }, *wkinfo = &_wkinfo; ++ ++ LKTRTrace("dlgt %d, do_wait %d\n", dlgt, do_wait); ++ DEBUG_ON(is_au_wkq(current)); ++ ++ if (unlikely(!do_wait)) { ++ static DECLARE_WAIT_QUEUE_HEAD(wq); ++ /* ++ * never fail. ++ * wkq_func() must free this wkinfo. ++ * it highly depends upon the implementation of workqueue. ++ */ ++ wait_event(wq, (wkinfo = kmalloc(sizeof(*wkinfo), GFP_KERNEL))); ++ wkinfo->wait = 0; ++ wkinfo->dlgt = !!dlgt; ++ wkinfo->func = func; ++ wkinfo->args = args; ++ wkinfo->comp = NULL; ++ __module_get(THIS_MODULE); ++ } ++ ++ AuInitWkq(&wkinfo->wk, wkq_func); ++#ifdef CONFIG_AUFS_DLGT ++ if (dlgt) ++ cred_store(&wkinfo->cred); ++#endif ++ do_wkq(wkinfo); ++ if (do_wait) ++ wait_for_completion(wkinfo->comp); ++} ++ ++#if 0 ++void au_wkq_wait_nwtask(void) ++{ ++ static DECLARE_WAIT_QUEUE_HEAD(wq); ++ wait_event(wq, !atomic_read(&au_wkq[aufs_nwkq].busy)); ++} ++#endif ++ ++void au_wkq_fin(void) ++{ ++ int i; ++ ++ TraceEnter(); ++ ++ for (i = 0; i < aufs_nwkq; i++) ++ if (au_wkq[i].q && !IS_ERR(au_wkq[i].q)) ++ destroy_workqueue(au_wkq[i].q); ++ kfree(au_wkq); ++} ++ ++int __init au_wkq_init(void) ++{ ++ int err, i; ++ struct au_wkq *nowaitq; ++ ++ LKTRTrace("%d\n", aufs_nwkq); ++ ++ /* '+1' is for accounting of nowait queue */ ++ err = -ENOMEM; ++ au_wkq = kcalloc(aufs_nwkq + 1, sizeof(*au_wkq), GFP_KERNEL); ++ if (unlikely(!au_wkq)) ++ goto out; ++ ++ err = 0; ++ for (i = 0; i < aufs_nwkq; i++) { ++ au_wkq[i].q = create_singlethread_workqueue(AUFS_WKQ_NAME); ++ if (au_wkq[i].q && !IS_ERR(au_wkq[i].q)) { ++ atomic_set(&au_wkq[i].busy, 0); ++ au_wkq[i].max_busy = 0; ++ continue; ++ } ++ ++ err = PTR_ERR(au_wkq[i].q); ++ au_wkq_fin(); ++ break; ++ } ++ ++ /* nowait accounting */ ++ nowaitq = au_wkq + aufs_nwkq; ++ atomic_set(&nowaitq->busy, 0); ++ nowaitq->max_busy = 0; ++ nowaitq->q = NULL; ++ ++#if 0 // test accouting ++ if (!err) { ++ static void f(void *args) { ++ DbgSleep(1); ++ } ++ int i; ++ //au_debug_on(); ++ LKTRTrace("f %p\n", f); ++ for (i = 0; i < 10; i++) ++ au_wkq_nowait(f, NULL, 0); ++ for (i = 0; i < aufs_nwkq; i++) ++ au_wkq_wait(f, NULL, 0); ++ DbgSleep(11); ++ //au_debug_off(); ++ } ++#endif ++ ++ out: ++ TraceErr(err); ++ return err; ++} +diff --git a/fs/aufs/wkq.h b/fs/aufs/wkq.h +new file mode 100755 +index 0000000..cc1bb25 +--- /dev/null ++++ b/fs/aufs/wkq.h +@@ -0,0 +1,81 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: wkq.h,v 1.9 2007/05/14 03:39:10 sfjro Exp $ */ ++ ++#ifndef __AUFS_WKQ_H__ ++#define __AUFS_WKQ_H__ ++ ++#ifdef __KERNEL__ ++ ++#include <linux/sched.h> ++#include <linux/version.h> ++#include <linux/workqueue.h> ++ ++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ++#define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) ++#endif ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* internal workqueue named AUFS_WKQ_NAME */ ++struct au_wkq { ++ struct workqueue_struct *q; ++ ++ /* accounting */ ++ atomic_t busy; ++ unsigned int max_busy; ++} ;//__attribute__ ((aligned)); ++ ++typedef void (*au_wkq_func_t)(void *args); ++ ++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) ++#define AuInitWkq(wk, func) INIT_WORK(wk, func) ++#define AuWkqFunc(name, arg) void name(struct work_struct *arg) ++#else ++typedef void (*work_func_t)(void *arg); ++#define AuInitWkq(wk, func) INIT_WORK(wk, func, wk) ++#define AuWkqFunc(name, arg) void name(void *arg) ++#endif ++ ++extern struct au_wkq *au_wkq; ++ ++void au_wkq_run(au_wkq_func_t func, void *args, int dlgt, int do_wait); ++//void au_wkq_wait_nwtask(void); ++int __init au_wkq_init(void); ++void au_wkq_fin(void); ++ ++/* ---------------------------------------------------------------------- */ ++ ++static inline int is_au_wkq(struct task_struct *tsk) ++{ ++ return (!tsk->mm && !strcmp(current->comm, AUFS_WKQ_NAME)); ++} ++ ++static inline void au_wkq_wait(au_wkq_func_t func, void *args, int dlgt) ++{ ++ au_wkq_run(func, args, dlgt, /*do_wait*/1); ++} ++ ++static inline void au_wkq_nowait(au_wkq_func_t func, void *args, int dlgt) ++{ ++ au_wkq_run(func, args, dlgt, /*do_wait*/0); ++} ++ ++#endif /* __KERNEL__ */ ++#endif /* __AUFS_WKQ_H__ */ +diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c +new file mode 100755 +index 0000000..145491e +--- /dev/null ++++ b/fs/aufs/xino.c +@@ -0,0 +1,644 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: xino.c,v 1.27 2007/05/14 03:39:10 sfjro Exp $ */ ++ ++//#include <linux/fs.h> ++#include <linux/fsnotify.h> ++#include <asm/uaccess.h> ++#include "aufs.h" ++ ++static readf_t find_readf(struct file *h_file) ++{ ++ const struct file_operations *fop = h_file->f_op; ++ ++ if (fop) { ++ if (fop->read) ++ return fop->read; ++ if (fop->aio_read) ++ return do_sync_read; ++ } ++ return ERR_PTR(-ENOSYS); ++} ++ ++static writef_t find_writef(struct file *h_file) ++{ ++ const struct file_operations *fop = h_file->f_op; ++ ++ if (fop) { ++ if (fop->write) ++ return fop->write; ++ if (fop->aio_write) ++ return do_sync_write; ++ } ++ return ERR_PTR(-ENOSYS); ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static ssize_t xino_fread(readf_t func, struct file *file, void *buf, ++ size_t size, loff_t *pos) ++{ ++ ssize_t err; ++ mm_segment_t oldfs; ++ ++ LKTRTrace("%.*s, sz %lu, *pos %Ld\n", ++ DLNPair(file->f_dentry), (unsigned long)size, *pos); ++ ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ do { ++ err = func(file, (char __user*)buf, size, pos); ++ } while (err == -EAGAIN || err == -EINTR); ++ set_fs(oldfs); ++ ++#if 0 ++ if (err > 0) ++ fsnotify_access(file->f_dentry); ++#endif ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static ssize_t do_xino_fwrite(writef_t func, struct file *file, void *buf, ++ size_t size, loff_t *pos) ++{ ++ ssize_t err; ++ mm_segment_t oldfs; ++ ++ lockdep_off(); ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ do { ++ err = func(file, (const char __user*)buf, size, pos); ++ } while (err == -EAGAIN || err == -EINTR); ++ set_fs(oldfs); ++ lockdep_on(); ++ ++#if 0 ++ if (err > 0) ++ fsnotify_modify(file->f_dentry); ++#endif ++ ++ TraceErr(err); ++ return err; ++} ++ ++struct do_xino_fwrite_args { ++ ssize_t *errp; ++ writef_t func; ++ struct file *file; ++ void *buf; ++ size_t size; ++ loff_t *pos; ++}; ++ ++static void call_do_xino_fwrite(void *args) ++{ ++ struct do_xino_fwrite_args *a = args; ++ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos); ++} ++ ++static ssize_t xino_fwrite(writef_t func, struct file *file, void *buf, ++ size_t size, loff_t *pos) ++{ ++ ssize_t err; ++ ++ LKTRTrace("%.*s, sz %lu, *pos %Ld\n", ++ DLNPair(file->f_dentry), (unsigned long)size, *pos); ++ ++ // signal block and no wkq? ++ /* ++ * it breaks RLIMIT_FSIZE and normal user's limit, ++ * users should care about quota and real 'filesystem full.' ++ */ ++ if (!is_au_wkq(current)) { ++ struct do_xino_fwrite_args args = { ++ .errp = &err, ++ .func = func, ++ .file = file, ++ .buf = buf, ++ .size = size, ++ .pos = pos ++ }; ++ au_wkq_wait(call_do_xino_fwrite, &args, /*dlgt*/0); ++ } else ++ err = do_xino_fwrite(func, file, buf, size, pos); ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ++ * write @ino to the xinofile for the specified branch{@sb, @bindex} ++ * at the position of @_ino. ++ * when @ino is zero, it is written to the xinofile and means no entry. ++ */ ++int xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ struct xino *xino) ++{ ++ struct aufs_branch *br; ++ loff_t pos; ++ ssize_t sz; ++ ++ LKTRTrace("b%d, hi%lu, i%lu\n", bindex, h_ino, xino->ino); ++ //DEBUG_ON(!xino->ino /* || !xino->h_gen */); ++ //WARN_ON(bindex == 0 && h_ino == 31); ++ ++ if (unlikely(!au_flag_test(sb, AuFlag_XINO))) ++ return 0; ++ ++ br = stobr(sb, bindex); ++ DEBUG_ON(!br || !br->br_xino); ++ pos = h_ino * sizeof(*xino); ++ sz = xino_fwrite(br->br_xino_write, br->br_xino, xino, sizeof(*xino), ++ &pos); ++ //if (LktrCond) sz = 1; ++ if (sz == sizeof(*xino)) ++ return 0; /* success */ ++ ++ IOErr("write failed (%ld)\n", (long)sz); ++ return -EIO; ++} ++ ++int xino_write0(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino) ++{ ++ struct xino xino = { ++ .ino = 0 ++ }; ++ return xino_write(sb, bindex, h_ino, &xino); ++} ++ ++// why is not atomic_long_inc_return defined? ++static DEFINE_SPINLOCK(alir_lock); ++static long atomic_long_inc_return(atomic_long_t *a) ++{ ++ long l; ++ ++ spin_lock(&alir_lock); ++ atomic_long_inc(a); ++ l = atomic_long_read(a); ++ spin_unlock(&alir_lock); ++ return l; ++} ++ ++ino_t xino_new_ino(struct super_block *sb) ++{ ++ ino_t ino; ++ ++ TraceEnter(); ++ ino = atomic_long_inc_return(&stosi(sb)->si_xino); ++ BUILD_BUG_ON(AUFS_FIRST_INO < AUFS_ROOT_INO); ++ if (ino >= AUFS_ROOT_INO) ++ return ino; ++ else { ++ atomic_long_dec(&stosi(sb)->si_xino); ++ IOErr1("inode number overflow\n"); ++ return 0; ++ } ++} ++ ++/* ++ * read @ino from xinofile for the specified branch{@sb, @bindex} ++ * at the position of @h_ino. ++ * if @ino does not exist and @do_new is true, get new one. ++ */ ++int xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino, ++ struct xino *xino) ++{ ++ int err; ++ struct aufs_branch *br; ++ struct file *file; ++ loff_t pos; ++ ssize_t sz; ++ ++ LKTRTrace("b%d, hi%lu\n", bindex, h_ino); ++ ++ err = 0; ++ xino->ino = 0; ++ if (unlikely(!au_flag_test(sb, AuFlag_XINO))) ++ return 0; /* no ino */ ++ ++ br = stobr(sb, bindex); ++ file = br->br_xino; ++ DEBUG_ON(!file); ++ pos = h_ino * sizeof(*xino); ++ if (i_size_read(file->f_dentry->d_inode) < pos + sizeof(*xino)) ++ return 0; /* no ino */ ++ ++ sz = xino_fread(br->br_xino_read, file, xino, sizeof(*xino), &pos); ++ if (sz == sizeof(*xino)) ++ return 0; /* success */ ++ ++ err = sz; ++ if (unlikely(sz >= 0)) { ++ err = -EIO; ++ IOErr("xino read error (%ld)\n", (long)sz); ++ } ++ ++ TraceErr(err); ++ return err; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++struct file *xino_create(struct super_block *sb, char *fname, int silent, ++ struct dentry *parent) ++{ ++ struct file *file; ++ int err; ++ struct dentry *hidden_parent; ++ struct inode *hidden_dir; ++ //const int udba = au_flag_test(sb, AuFlag_UDBA_INOTIFY); ++ ++ LKTRTrace("%s\n", fname); ++ //DEBUG_ON(!au_flag_test(sb, AuFlag_XINO)); ++ ++ // LSM may detect it ++ // use sio? ++ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE, ++ S_IRUGO | S_IWUGO); ++ //file = ERR_PTR(-1); ++ if (IS_ERR(file)) { ++ if (!silent) ++ Err("open %s(%ld)\n", fname, PTR_ERR(file)); ++ return file; ++ } ++#if 0 ++ if (unlikely(udba && parent)) ++ au_direval_dec(parent); ++#endif ++ ++ /* keep file count */ ++ hidden_parent = dget_parent(file->f_dentry); ++ hidden_dir = hidden_parent->d_inode; ++ hi_lock_parent(hidden_dir); ++ err = vfsub_unlink(hidden_dir, file->f_dentry, /*dlgt*/0); ++#if 0 ++ if (unlikely(!err && udba && parent)) ++ au_direval_dec(parent); ++#endif ++ i_unlock(hidden_dir); ++ dput(hidden_parent); ++ if (unlikely(err)) { ++ if (!silent) ++ Err("unlink %s(%d)\n", fname, err); ++ goto out; ++ } ++ if (sb != file->f_dentry->d_sb) ++ return file; /* success */ ++ ++ if (!silent) ++ Err("%s must be outside\n", fname); ++ err = -EINVAL; ++ ++ out: ++ fput(file); ++ file = ERR_PTR(err); ++ return file; ++} ++ ++/* ++ * find another branch who is on the same filesystem of the specified ++ * branch{@btgt}. search until @bend. ++ */ ++static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt, ++ aufs_bindex_t bend) ++{ ++ aufs_bindex_t bindex; ++ struct super_block *tgt_sb = sbr_sb(sb, btgt); ++ ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (unlikely(btgt != bindex && tgt_sb == sbr_sb(sb, bindex))) ++ return bindex; ++ return -1; ++} ++ ++/* ++ * create a new xinofile at the same place/path as @base_file. ++ */ ++static struct file *xino_create2(struct file *base_file) ++{ ++ struct file *file; ++ int err; ++ struct dentry *base, *dentry, *parent; ++ struct inode *dir; ++ struct qstr *name; ++ struct lkup_args lkup = { ++ .nfsmnt = NULL, ++ .dlgt = 0 ++ }; ++ ++ base = base_file->f_dentry; ++ LKTRTrace("%.*s\n", DLNPair(base)); ++ parent = dget_parent(base); ++ dir = parent->d_inode; ++ IMustLock(dir); ++ ++ file = ERR_PTR(-EINVAL); ++ if (unlikely(au_is_nfs(parent->d_sb))) ++ goto out; ++ ++ // do not superio, nor NFS. ++ name = &base->d_name; ++ dentry = lkup_one(name->name, parent, name->len, &lkup); ++ //if (LktrCond) {dput(dentry); dentry = ERR_PTR(-1);} ++ if (IS_ERR(dentry)) { ++ file = (void*)dentry; ++ Err("%.*s lookup err %ld\n", LNPair(name), PTR_ERR(dentry)); ++ goto out; ++ } ++ err = vfsub_create(dir, dentry, S_IRUGO | S_IWUGO, NULL, /*dlgt*/0); ++ //if (LktrCond) {vfs_unlink(dir, dentry); err = -1;} ++ if (unlikely(err)) { ++ file = ERR_PTR(err); ++ Err("%.*s create err %d\n", LNPair(name), err); ++ goto out_dput; ++ } ++ file = dentry_open(dget(dentry), mntget(base_file->f_vfsmnt), ++ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE); ++ //if (LktrCond) {fput(file); file = ERR_PTR(-1);} ++ if (IS_ERR(file)) { ++ Err("%.*s open err %ld\n", LNPair(name), PTR_ERR(file)); ++ goto out_dput; ++ } ++ err = vfsub_unlink(dir, dentry, /*dlgt*/0); ++ //if (LktrCond) err = -1; ++ if (!err) ++ goto out_dput; /* success */ ++ ++ Err("%.*s unlink err %d\n", LNPair(name), err); ++ fput(file); ++ file = ERR_PTR(err); ++ ++ out_dput: ++ dput(dentry); ++ out: ++ dput(parent); ++ TraceErrPtr(file); ++ return file; ++} ++ ++/* ++ * initialize the xinofile for the specified branch{@sb, @bindex} ++ * at the place/path where @base_file indicates. ++ * test whether another branch is on the same filesystem or not, ++ * if @do_test is true. ++ */ ++int xino_init(struct super_block *sb, aufs_bindex_t bindex, ++ struct file *base_file, int do_test) ++{ ++ int err; ++ struct aufs_branch *br; ++ aufs_bindex_t bshared, bend; ++ struct file *file; ++ struct inode *inode, *hidden_inode; ++ struct xino xino; ++ ++ LKTRTrace("b%d, base_file %p, do_test %d\n", ++ bindex, base_file, do_test); ++ SiMustWriteLock(sb); ++ DEBUG_ON(!au_flag_test(sb, AuFlag_XINO)); ++ br = stobr(sb, bindex); ++ DEBUG_ON(br->br_xino); ++ ++ file = NULL; ++ bshared = -1; ++ bend = sbend(sb); ++ if (do_test) ++ bshared = is_sb_shared(sb, bindex, bend); ++ if (unlikely(bshared >= 0)) { ++ struct aufs_branch *shared_br = stobr(sb, bshared); ++ if (shared_br->br_xino) { ++ file = shared_br->br_xino; ++ get_file(file); ++ } ++ } ++ ++ if (!file) { ++ struct dentry *parent = dget_parent(base_file->f_dentry); ++ struct inode *dir = parent->d_inode; ++ ++ hi_lock_parent(dir); ++ file = xino_create2(base_file); ++ //if (LktrCond) {fput(file); file = ERR_PTR(-1);} ++ i_unlock(dir); ++ dput(parent); ++ err = PTR_ERR(file); ++ if (IS_ERR(file)) ++ goto out; ++ } ++ br->br_xino_read = find_readf(file); ++ err = PTR_ERR(br->br_xino_read); ++ if (IS_ERR(br->br_xino_read)) ++ goto out_put; ++ br->br_xino_write = find_writef(file); ++ err = PTR_ERR(br->br_xino_write); ++ if (IS_ERR(br->br_xino_write)) ++ goto out_put; ++ br->br_xino = file; ++ ++ inode = sb->s_root->d_inode; ++ hidden_inode = au_h_iptr_i(inode, bindex); ++ xino.ino = inode->i_ino; ++ //xino.h_gen = hidden_inode->i_generation; ++ //WARN_ON(xino.h_gen == AuXino_INVALID_HGEN); ++ err = xino_write(sb, bindex, hidden_inode->i_ino, &xino); ++ //if (LktrCond) err = -1; ++ if (!err) ++ return 0; /* success */ ++ ++ br->br_xino = NULL; ++ ++ out_put: ++ fput(file); ++ out: ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * set xino mount option. ++ */ ++int xino_set(struct super_block *sb, struct opt_xino *xino, int remount) ++{ ++ int err, sparse; ++ aufs_bindex_t bindex, bend; ++ struct aufs_branch *br; ++ struct dentry *parent; ++ struct qstr *name; ++ struct file *cur_xino; ++ struct inode *dir; ++ ++ LKTRTrace("%s\n", xino->path); ++ ++ err = 0; ++ name = &xino->file->f_dentry->d_name; ++ parent = dget_parent(xino->file->f_dentry); ++ dir = parent->d_inode; ++ cur_xino = stobr(sb, 0)->br_xino; ++ if (remount ++ && cur_xino ++ && cur_xino->f_dentry->d_parent == parent ++ && name->len == cur_xino->f_dentry->d_name.len ++ && !memcmp(name->name, cur_xino->f_dentry->d_name.name, name->len)) ++ goto out; ++ ++ au_flag_set(sb, AuFlag_XINO); ++ bend = sbend(sb); ++ for (bindex = bend; bindex >= 0; bindex--) { ++ br = stobr(sb, bindex); ++ if (unlikely(br->br_xino && file_count(br->br_xino) > 1)) { ++ fput(br->br_xino); ++ br->br_xino = NULL; ++ } ++ } ++ ++ for (bindex = 0; bindex <= bend; bindex++) { ++ struct file *file; ++ struct inode *inode; ++ ++ br = stobr(sb, bindex); ++ if (unlikely(!br->br_xino)) ++ continue; ++ ++ DEBUG_ON(file_count(br->br_xino) != 1); ++ hi_lock_parent(dir); ++ file = xino_create2(xino->file); ++ //if (LktrCond) {fput(file); file = ERR_PTR(-1);} ++ err = PTR_ERR(file); ++ if (IS_ERR(file)) { ++ i_unlock(dir); ++ break; ++ } ++ inode = br->br_xino->f_dentry->d_inode; ++ err = au_copy_file(file, br->br_xino, i_size_read(inode), sb, ++ &sparse); ++ //if (LktrCond) err = -1; ++ i_unlock(dir); ++ if (unlikely(err)) { ++ fput(file); ++ break; ++ } ++ fput(br->br_xino); ++ br->br_xino = file; ++ br->br_xino_read = find_readf(file); ++ DEBUG_ON(IS_ERR(br->br_xino_read)); ++ br->br_xino_write = find_writef(file); ++ DEBUG_ON(IS_ERR(br->br_xino_write)); ++ } ++ ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (unlikely(!stobr(sb, bindex)->br_xino)) { ++ err = xino_init(sb, bindex, xino->file, /*do_test*/1); ++ //if (LktrCond) {fput(stobr(sb, bindex)->br_xino); ++ //stobr(sb, bindex)->br_xino = NULL; err = -1;} ++ if (!err) ++ continue; ++ IOErr("creating xino for branch %d(%d), " ++ "forcing noxino\n", bindex, err); ++ err = -EIO; ++ break; ++ } ++ out: ++ dput(parent); ++ if (!err) ++ au_flag_set(sb, AuFlag_XINO); ++ else ++ au_flag_clr(sb, AuFlag_XINO); ++ TraceErr(err); ++ return err; ++} ++ ++/* ++ * clear xino mount option ++ */ ++int xino_clr(struct super_block *sb) ++{ ++ aufs_bindex_t bindex, bend; ++ ++ TraceEnter(); ++ SiMustWriteLock(sb); ++ ++ bend = sbend(sb); ++ for (bindex = 0; bindex <= bend; bindex++) { ++ struct aufs_branch *br; ++ br = stobr(sb, bindex); ++ if (br->br_xino) { ++ fput(br->br_xino); ++ br->br_xino = NULL; ++ } ++ } ++ ++ //todo: need to make iunique() to return the larger inode number ++ ++ au_flag_clr(sb, AuFlag_XINO); ++ return 0; ++} ++ ++/* ++ * create a xinofile at the default place/path. ++ */ ++struct file *xino_def(struct super_block *sb) ++{ ++ struct file *file; ++ aufs_bindex_t bend, bindex, bwr; ++ char *page, *p; ++ ++ bend = sbend(sb); ++ bwr = -1; ++ for (bindex = 0; bindex <= bend; bindex++) ++ if (br_writable(sbr_perm(sb, bindex)) ++ && !au_is_nfs(au_h_dptr_i(sb->s_root, bindex)->d_sb)) { ++ bwr = bindex; ++ break; ++ } ++ ++ if (bwr != -1) { ++ // todo: rewrite with lkup_one() ++ file = ERR_PTR(-ENOMEM); ++ page = __getname(); ++ //if (LktrCond) {__putname(page); page = NULL;} ++ if (unlikely(!page)) ++ goto out; ++ p = d_path(au_h_dptr_i(sb->s_root, bwr), sbr_mnt(sb, bwr), page, ++ PATH_MAX - sizeof(AUFS_XINO_FNAME)); ++ //if (LktrCond) p = ERR_PTR(-1); ++ file = (void*)p; ++ if (p && !IS_ERR(p)) { ++ strcat(p, "/" AUFS_XINO_FNAME); ++ LKTRTrace("%s\n", p); ++ file = xino_create(sb, p, /*silent*/0, sb->s_root); ++ //if (LktrCond) {fput(file); file = ERR_PTR(-1);} ++ } ++ __putname(page); ++ } else { ++ file = xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0, ++ /*parent*/NULL); ++ //if (LktrCond) {fput(file); file = ERR_PTR(-1);} ++ } ++ ++ out: ++ TraceErrPtr(file); ++ return file; ++} +diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile +new file mode 100644 +index 0000000..1bc7b06 +--- /dev/null ++++ b/fs/squashfs/Makefile +@@ -0,0 +1,7 @@ ++# ++# Makefile for the linux squashfs routines. ++# ++ ++obj-$(CONFIG_SQUASHFS) += squashfs.o ++squashfs-y += inode.o ++squashfs-y += squashfs2_0.o +diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c +new file mode 100644 +index 0000000..895b699 +--- /dev/null ++++ b/fs/squashfs/inode.c +@@ -0,0 +1,2329 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007 ++ * Phillip Lougher <phillip@lougher.org.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * inode.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/vmalloc.h> ++#include <linux/smp_lock.h> ++ ++#include "squashfs.h" ++ ++static void vfs_read_inode(struct inode *i); ++static struct dentry *squashfs_get_parent(struct dentry *child); ++static int squashfs_read_inode(struct inode *i, squashfs_inode_t inode); ++static int squashfs_statfs(struct dentry *, struct kstatfs *); ++static int squashfs_symlink_readpage(struct file *file, struct page *page); ++static long long read_blocklist(struct inode *inode, int index, ++ int readahead_blks, char *block_list, ++ unsigned short **block_p, unsigned int *bsize); ++static int squashfs_readpage(struct file *file, struct page *page); ++static int squashfs_readpage4K(struct file *file, struct page *page); ++static int squashfs_readdir(struct file *, void *, filldir_t); ++static struct dentry *squashfs_lookup(struct inode *, struct dentry *, ++ struct nameidata *); ++static int squashfs_remount(struct super_block *s, int *flags, char *data); ++static void squashfs_put_super(struct super_block *); ++static int squashfs_get_sb(struct file_system_type *,int, const char *, void *, ++ struct vfsmount *); ++static struct inode *squashfs_alloc_inode(struct super_block *sb); ++static void squashfs_destroy_inode(struct inode *inode); ++static int init_inodecache(void); ++static void destroy_inodecache(void); ++ ++static struct file_system_type squashfs_fs_type = { ++ .owner = THIS_MODULE, ++ .name = "squashfs", ++ .get_sb = squashfs_get_sb, ++ .kill_sb = kill_block_super, ++ .fs_flags = FS_REQUIRES_DEV ++}; ++ ++static const unsigned char squashfs_filetype_table[] = { ++ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK ++}; ++ ++static struct super_operations squashfs_super_ops = { ++ .alloc_inode = squashfs_alloc_inode, ++ .destroy_inode = squashfs_destroy_inode, ++ .statfs = squashfs_statfs, ++ .put_super = squashfs_put_super, ++ .remount_fs = squashfs_remount ++}; ++ ++static struct super_operations squashfs_export_super_ops = { ++ .alloc_inode = squashfs_alloc_inode, ++ .destroy_inode = squashfs_destroy_inode, ++ .statfs = squashfs_statfs, ++ .put_super = squashfs_put_super, ++ .read_inode = vfs_read_inode ++}; ++ ++static struct export_operations squashfs_export_ops = { ++ .get_parent = squashfs_get_parent ++}; ++ ++SQSH_EXTERN const struct address_space_operations squashfs_symlink_aops = { ++ .readpage = squashfs_symlink_readpage ++}; ++ ++SQSH_EXTERN const struct address_space_operations squashfs_aops = { ++ .readpage = squashfs_readpage ++}; ++ ++SQSH_EXTERN const struct address_space_operations squashfs_aops_4K = { ++ .readpage = squashfs_readpage4K ++}; ++ ++static const struct file_operations squashfs_dir_ops = { ++ .read = generic_read_dir, ++ .readdir = squashfs_readdir ++}; ++ ++SQSH_EXTERN struct inode_operations squashfs_dir_inode_ops = { ++ .lookup = squashfs_lookup ++}; ++ ++ ++static struct buffer_head *get_block_length(struct super_block *s, ++ int *cur_index, int *offset, int *c_byte) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ unsigned short temp; ++ struct buffer_head *bh; ++ ++ if (!(bh = sb_bread(s, *cur_index))) ++ goto out; ++ ++ if (msblk->devblksize - *offset == 1) { ++ if (msblk->swap) ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ else ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ brelse(bh); ++ if (!(bh = sb_bread(s, ++(*cur_index)))) ++ goto out; ++ if (msblk->swap) ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ bh->b_data); ++ else ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ bh->b_data); ++ *c_byte = temp; ++ *offset = 1; ++ } else { ++ if (msblk->swap) { ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset + 1)); ++ } else { ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset + 1)); ++ } ++ *c_byte = temp; ++ *offset += 2; ++ } ++ ++ if (SQUASHFS_CHECK_DATA(msblk->sblk.flags)) { ++ if (*offset == msblk->devblksize) { ++ brelse(bh); ++ if (!(bh = sb_bread(s, ++(*cur_index)))) ++ goto out; ++ *offset = 0; ++ } ++ if (*((unsigned char *) (bh->b_data + *offset)) != ++ SQUASHFS_MARKER_BYTE) { ++ ERROR("Metadata block marker corrupt @ %x\n", ++ *cur_index); ++ brelse(bh); ++ goto out; ++ } ++ (*offset)++; ++ } ++ return bh; ++ ++out: ++ return NULL; ++} ++ ++ ++SQSH_EXTERN unsigned int squashfs_read_data(struct super_block *s, char *buffer, ++ long long index, unsigned int length, ++ long long *next_index, int srclength) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ struct buffer_head *bh[((SQUASHFS_FILE_MAX_SIZE - 1) >> ++ msblk->devblksize_log2) + 2]; ++ unsigned int offset = index & ((1 << msblk->devblksize_log2) - 1); ++ unsigned int cur_index = index >> msblk->devblksize_log2; ++ int bytes, avail_bytes, b = 0, k = 0; ++ unsigned int compressed; ++ unsigned int c_byte = length; ++ ++ if (c_byte) { ++ bytes = msblk->devblksize - offset; ++ compressed = SQUASHFS_COMPRESSED_BLOCK(c_byte); ++ c_byte = SQUASHFS_COMPRESSED_SIZE_BLOCK(c_byte); ++ ++ TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", index, compressed ++ ? "" : "un", (unsigned int) c_byte, srclength); ++ ++ if (c_byte > srclength || index < 0 || (index + c_byte) > sblk->bytes_used) ++ goto read_failure; ++ ++ if (!(bh[0] = sb_getblk(s, cur_index))) ++ goto block_release; ++ ++ for (b = 1; bytes < c_byte; b++) { ++ if (!(bh[b] = sb_getblk(s, ++cur_index))) ++ goto block_release; ++ bytes += msblk->devblksize; ++ } ++ ll_rw_block(READ, b, bh); ++ } else { ++ if (index < 0 || (index + 2) > sblk->bytes_used) ++ goto read_failure; ++ ++ if (!(bh[0] = get_block_length(s, &cur_index, &offset, ++ &c_byte))) ++ goto read_failure; ++ ++ bytes = msblk->devblksize - offset; ++ compressed = SQUASHFS_COMPRESSED(c_byte); ++ c_byte = SQUASHFS_COMPRESSED_SIZE(c_byte); ++ ++ TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ++ ? "" : "un", (unsigned int) c_byte); ++ ++ if (c_byte > srclength || (index + c_byte) > sblk->bytes_used) ++ goto read_failure; ++ ++ for (b = 1; bytes < c_byte; b++) { ++ if (!(bh[b] = sb_getblk(s, ++cur_index))) ++ goto block_release; ++ bytes += msblk->devblksize; ++ } ++ ll_rw_block(READ, b - 1, bh + 1); ++ } ++ ++ if (compressed) { ++ int zlib_err = 0; ++ ++ /* ++ * uncompress block ++ */ ++ ++ mutex_lock(&msblk->read_data_mutex); ++ ++ msblk->stream.next_out = buffer; ++ msblk->stream.avail_out = srclength; ++ ++ for (bytes = 0; k < b; k++) { ++ avail_bytes = (c_byte - bytes) > (msblk->devblksize - offset) ? ++ msblk->devblksize - offset : ++ c_byte - bytes; ++ wait_on_buffer(bh[k]); ++ if (!buffer_uptodate(bh[k])) ++ goto release_mutex; ++ ++ msblk->stream.next_in = bh[k]->b_data + offset; ++ msblk->stream.avail_in = avail_bytes; ++ ++ if (k == 0) { ++ zlib_err = zlib_inflateInit(&msblk->stream); ++ if (zlib_err != Z_OK) { ++ ERROR("zlib_inflateInit returned unexpected result 0x%x, srclength %d\n", ++ zlib_err, srclength); ++ goto release_mutex; ++ } ++ ++ if (avail_bytes == 0) { ++ offset = 0; ++ brelse(bh[k]); ++ continue; ++ } ++ } ++ ++ zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH); ++ if (zlib_err != Z_OK && zlib_err != Z_STREAM_END) { ++ ERROR("zlib_inflate returned unexpected result 0x%x, srclength %d, avail_in %d, avail_out %d\n", ++ zlib_err, srclength, msblk->stream.avail_in, msblk->stream.avail_out); ++ goto release_mutex; ++ } ++ ++ bytes += avail_bytes; ++ offset = 0; ++ brelse(bh[k]); ++ } ++ ++ if (zlib_err != Z_STREAM_END) ++ goto release_mutex; ++ ++ zlib_err = zlib_inflateEnd(&msblk->stream); ++ if (zlib_err != Z_OK) { ++ ERROR("zlib_inflateEnd returned unexpected result 0x%x, srclength %d\n", ++ zlib_err, srclength); ++ goto release_mutex; ++ } ++ bytes = msblk->stream.total_out; ++ mutex_unlock(&msblk->read_data_mutex); ++ } else { ++ int i; ++ ++ for(i = 0; i < b; i++) { ++ wait_on_buffer(bh[i]); ++ if(!buffer_uptodate(bh[i])) ++ goto block_release; ++ } ++ ++ for (bytes = 0; k < b; k++) { ++ avail_bytes = (c_byte - bytes) > (msblk->devblksize - offset) ? ++ msblk->devblksize - offset : ++ c_byte - bytes; ++ memcpy(buffer + bytes, bh[k]->b_data + offset, avail_bytes); ++ bytes += avail_bytes; ++ offset = 0; ++ brelse(bh[k]); ++ } ++ } ++ ++ if (next_index) ++ *next_index = index + c_byte + (length ? 0 : ++ (SQUASHFS_CHECK_DATA(msblk->sblk.flags) ++ ? 3 : 2)); ++ return bytes; ++ ++release_mutex: ++ mutex_unlock(&msblk->read_data_mutex); ++ ++block_release: ++ for (; k < b; k++) ++ brelse(bh[k]); ++ ++read_failure: ++ ERROR("sb_bread failed reading block 0x%x\n", cur_index); ++ return 0; ++} ++ ++ ++SQSH_EXTERN int squashfs_get_cached_block(struct super_block *s, char *buffer, ++ long long block, unsigned int offset, ++ int length, long long *next_block, ++ unsigned int *next_offset) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ int n, i, bytes, return_length = length; ++ long long next_index; ++ ++ TRACE("Entered squashfs_get_cached_block [%llx:%x]\n", block, offset); ++ ++ while ( 1 ) { ++ for (i = 0; i < SQUASHFS_CACHED_BLKS; i++) ++ if (msblk->block_cache[i].block == block) ++ break; ++ ++ mutex_lock(&msblk->block_cache_mutex); ++ ++ if (i == SQUASHFS_CACHED_BLKS) { ++ /* read inode header block */ ++ for (i = msblk->next_cache, n = SQUASHFS_CACHED_BLKS; ++ n ; n --, i = (i + 1) % ++ SQUASHFS_CACHED_BLKS) ++ if (msblk->block_cache[i].block != ++ SQUASHFS_USED_BLK) ++ break; ++ ++ if (n == 0) { ++ wait_queue_t wait; ++ ++ init_waitqueue_entry(&wait, current); ++ add_wait_queue(&msblk->waitq, &wait); ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ mutex_unlock(&msblk->block_cache_mutex); ++ schedule(); ++ set_current_state(TASK_RUNNING); ++ remove_wait_queue(&msblk->waitq, &wait); ++ continue; ++ } ++ msblk->next_cache = (i + 1) % SQUASHFS_CACHED_BLKS; ++ ++ if (msblk->block_cache[i].block == ++ SQUASHFS_INVALID_BLK) { ++ if (!(msblk->block_cache[i].data = ++ kmalloc(SQUASHFS_METADATA_SIZE, ++ GFP_KERNEL))) { ++ ERROR("Failed to allocate cache" ++ "block\n"); ++ mutex_unlock(&msblk->block_cache_mutex); ++ goto out; ++ } ++ } ++ ++ msblk->block_cache[i].block = SQUASHFS_USED_BLK; ++ mutex_unlock(&msblk->block_cache_mutex); ++ ++ msblk->block_cache[i].length = squashfs_read_data(s, ++ msblk->block_cache[i].data, block, 0, &next_index, SQUASHFS_METADATA_SIZE); ++ if (msblk->block_cache[i].length == 0) { ++ ERROR("Unable to read cache block [%llx:%x]\n", ++ block, offset); ++ mutex_lock(&msblk->block_cache_mutex); ++ msblk->block_cache[i].block = SQUASHFS_INVALID_BLK; ++ kfree(msblk->block_cache[i].data); ++ wake_up(&msblk->waitq); ++ mutex_unlock(&msblk->block_cache_mutex); ++ goto out; ++ } ++ ++ mutex_lock(&msblk->block_cache_mutex); ++ wake_up(&msblk->waitq); ++ msblk->block_cache[i].block = block; ++ msblk->block_cache[i].next_index = next_index; ++ TRACE("Read cache block [%llx:%x]\n", block, offset); ++ } ++ ++ if (msblk->block_cache[i].block != block) { ++ mutex_unlock(&msblk->block_cache_mutex); ++ continue; ++ } ++ ++ bytes = msblk->block_cache[i].length - offset; ++ ++ if (bytes < 1) { ++ mutex_unlock(&msblk->block_cache_mutex); ++ goto out; ++ } else if (bytes >= length) { ++ if (buffer) ++ memcpy(buffer, msblk->block_cache[i].data + ++ offset, length); ++ if (msblk->block_cache[i].length - offset == length) { ++ *next_block = msblk->block_cache[i].next_index; ++ *next_offset = 0; ++ } else { ++ *next_block = block; ++ *next_offset = offset + length; ++ } ++ mutex_unlock(&msblk->block_cache_mutex); ++ goto finish; ++ } else { ++ if (buffer) { ++ memcpy(buffer, msblk->block_cache[i].data + ++ offset, bytes); ++ buffer += bytes; ++ } ++ block = msblk->block_cache[i].next_index; ++ mutex_unlock(&msblk->block_cache_mutex); ++ length -= bytes; ++ offset = 0; ++ } ++ } ++ ++finish: ++ return return_length; ++out: ++ return 0; ++} ++ ++ ++static int get_fragment_location(struct super_block *s, unsigned int fragment, ++ long long *fragment_start_block, ++ unsigned int *fragment_size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ long long start_block = ++ msblk->fragment_index[SQUASHFS_FRAGMENT_INDEX(fragment)]; ++ int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); ++ struct squashfs_fragment_entry fragment_entry; ++ ++ if (msblk->swap) { ++ struct squashfs_fragment_entry sfragment_entry; ++ ++ if (!squashfs_get_cached_block(s, (char *) &sfragment_entry, ++ start_block, offset, ++ sizeof(sfragment_entry), &start_block, ++ &offset)) ++ goto out; ++ SQUASHFS_SWAP_FRAGMENT_ENTRY(&fragment_entry, &sfragment_entry); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) &fragment_entry, ++ start_block, offset, ++ sizeof(fragment_entry), &start_block, ++ &offset)) ++ goto out; ++ ++ *fragment_start_block = fragment_entry.start_block; ++ *fragment_size = fragment_entry.size; ++ ++ return 1; ++ ++out: ++ return 0; ++} ++ ++ ++SQSH_EXTERN void release_cached_fragment(struct squashfs_sb_info *msblk, struct ++ squashfs_fragment_cache *fragment) ++{ ++ mutex_lock(&msblk->fragment_mutex); ++ fragment->locked --; ++ wake_up(&msblk->fragment_wait_queue); ++ mutex_unlock(&msblk->fragment_mutex); ++} ++ ++ ++SQSH_EXTERN struct squashfs_fragment_cache *get_cached_fragment(struct super_block ++ *s, long long start_block, ++ int length) ++{ ++ int i, n; ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ while ( 1 ) { ++ mutex_lock(&msblk->fragment_mutex); ++ ++ for (i = 0; i < SQUASHFS_CACHED_FRAGMENTS && ++ msblk->fragment[i].block != start_block; i++); ++ ++ if (i == SQUASHFS_CACHED_FRAGMENTS) { ++ for (i = msblk->next_fragment, n = ++ SQUASHFS_CACHED_FRAGMENTS; n && ++ msblk->fragment[i].locked; n--, i = (i + 1) % ++ SQUASHFS_CACHED_FRAGMENTS); ++ ++ if (n == 0) { ++ wait_queue_t wait; ++ ++ init_waitqueue_entry(&wait, current); ++ add_wait_queue(&msblk->fragment_wait_queue, ++ &wait); ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ mutex_unlock(&msblk->fragment_mutex); ++ schedule(); ++ set_current_state(TASK_RUNNING); ++ remove_wait_queue(&msblk->fragment_wait_queue, ++ &wait); ++ continue; ++ } ++ msblk->next_fragment = (msblk->next_fragment + 1) % ++ SQUASHFS_CACHED_FRAGMENTS; ++ ++ if (msblk->fragment[i].data == NULL) ++ if (!(msblk->fragment[i].data = SQUASHFS_ALLOC ++ (SQUASHFS_FILE_MAX_SIZE))) { ++ ERROR("Failed to allocate fragment " ++ "cache block\n"); ++ mutex_unlock(&msblk->fragment_mutex); ++ goto out; ++ } ++ ++ msblk->fragment[i].block = SQUASHFS_INVALID_BLK; ++ msblk->fragment[i].locked = 1; ++ mutex_unlock(&msblk->fragment_mutex); ++ ++ if (!(msblk->fragment[i].length = squashfs_read_data(s, ++ msblk->fragment[i].data, ++ start_block, length, NULL, sblk->block_size))) { ++ ERROR("Unable to read fragment cache block " ++ "[%llx]\n", start_block); ++ msblk->fragment[i].locked = 0; ++ smp_mb(); ++ goto out; ++ } ++ ++ mutex_lock(&msblk->fragment_mutex); ++ msblk->fragment[i].block = start_block; ++ TRACE("New fragment %d, start block %lld, locked %d\n", ++ i, msblk->fragment[i].block, ++ msblk->fragment[i].locked); ++ mutex_unlock(&msblk->fragment_mutex); ++ break; ++ } ++ ++ msblk->fragment[i].locked++; ++ mutex_unlock(&msblk->fragment_mutex); ++ TRACE("Got fragment %d, start block %lld, locked %d\n", i, ++ msblk->fragment[i].block, ++ msblk->fragment[i].locked); ++ break; ++ } ++ ++ return &msblk->fragment[i]; ++ ++out: ++ return NULL; ++} ++ ++ ++static void squashfs_new_inode(struct squashfs_sb_info *msblk, struct inode *i, ++ struct squashfs_base_inode_header *inodeb) ++{ ++ i->i_ino = inodeb->inode_number; ++ i->i_mtime.tv_sec = inodeb->mtime; ++ i->i_atime.tv_sec = inodeb->mtime; ++ i->i_ctime.tv_sec = inodeb->mtime; ++ i->i_uid = msblk->uid[inodeb->uid]; ++ i->i_mode = inodeb->mode; ++ i->i_size = 0; ++ if (inodeb->guid == SQUASHFS_GUIDS) ++ i->i_gid = i->i_uid; ++ else ++ i->i_gid = msblk->guid[inodeb->guid]; ++} ++ ++ ++static squashfs_inode_t squashfs_inode_lookup(struct super_block *s, int ino) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ long long start = msblk->inode_lookup_table[SQUASHFS_LOOKUP_BLOCK(ino - 1)]; ++ int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino - 1); ++ squashfs_inode_t inode; ++ ++ TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino); ++ ++ if (msblk->swap) { ++ squashfs_inode_t sinode; ++ ++ if (!squashfs_get_cached_block(s, (char *) &sinode, start, offset, ++ sizeof(sinode), &start, &offset)) ++ goto out; ++ SQUASHFS_SWAP_INODE_T((&inode), &sinode); ++ } else if (!squashfs_get_cached_block(s, (char *) &inode, start, offset, ++ sizeof(inode), &start, &offset)) ++ goto out; ++ ++ TRACE("squashfs_inode_lookup, inode = 0x%llx\n", inode); ++ ++ return inode; ++ ++out: ++ return SQUASHFS_INVALID_BLK; ++} ++ ++ ++static void vfs_read_inode(struct inode *i) ++{ ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ squashfs_inode_t inode = squashfs_inode_lookup(i->i_sb, i->i_ino); ++ ++ TRACE("Entered vfs_read_inode\n"); ++ ++ if(inode != SQUASHFS_INVALID_BLK) ++ (msblk->read_inode)(i, inode); ++} ++ ++ ++static struct dentry *squashfs_get_parent(struct dentry *child) ++{ ++ struct inode *i = child->d_inode; ++ struct inode *parent = iget(i->i_sb, SQUASHFS_I(i)->u.s2.parent_inode); ++ struct dentry *rv; ++ ++ TRACE("Entered squashfs_get_parent\n"); ++ ++ if(parent == NULL) { ++ rv = ERR_PTR(-EACCES); ++ goto out; ++ } ++ ++ rv = d_alloc_anon(parent); ++ if(rv == NULL) ++ rv = ERR_PTR(-ENOMEM); ++ ++out: ++ return rv; ++} ++ ++ ++SQSH_EXTERN struct inode *squashfs_iget(struct super_block *s, squashfs_inode_t inode, unsigned int inode_number) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct inode *i = iget_locked(s, inode_number); ++ ++ TRACE("Entered squashfs_iget\n"); ++ ++ if(i && (i->i_state & I_NEW)) { ++ (msblk->read_inode)(i, inode); ++ unlock_new_inode(i); ++ } ++ ++ return i; ++} ++ ++ ++static int squashfs_read_inode(struct inode *i, squashfs_inode_t inode) ++{ ++ struct super_block *s = i->i_sb; ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long block = SQUASHFS_INODE_BLK(inode) + ++ sblk->inode_table_start; ++ unsigned int offset = SQUASHFS_INODE_OFFSET(inode); ++ long long next_block; ++ unsigned int next_offset; ++ union squashfs_inode_header id, sid; ++ struct squashfs_base_inode_header *inodeb = &id.base, ++ *sinodeb = &sid.base; ++ ++ TRACE("Entered squashfs_read_inode\n"); ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) sinodeb, block, ++ offset, sizeof(*sinodeb), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_BASE_INODE_HEADER(inodeb, sinodeb, ++ sizeof(*sinodeb)); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) inodeb, block, ++ offset, sizeof(*inodeb), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ squashfs_new_inode(msblk, i, inodeb); ++ ++ switch(inodeb->inode_type) { ++ case SQUASHFS_FILE_TYPE: { ++ unsigned int frag_size; ++ long long frag_blk; ++ struct squashfs_reg_inode_header *inodep = &id.reg; ++ struct squashfs_reg_inode_header *sinodep = &sid.reg; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_REG_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ frag_blk = SQUASHFS_INVALID_BLK; ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG && ++ !get_fragment_location(s, ++ inodep->fragment, &frag_blk, &frag_size)) ++ goto failed_read; ++ ++ i->i_nlink = 1; ++ i->i_size = inodep->file_size; ++ i->i_fop = &generic_ro_fops; ++ i->i_mode |= S_IFREG; ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1; ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk; ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size; ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ if (sblk->block_size > 4096) ++ i->i_data.a_ops = &squashfs_aops; ++ else ++ i->i_data.a_ops = &squashfs_aops_4K; ++ ++ TRACE("File inode %x:%x, start_block %llx, " ++ "block_list_start %llx, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, next_block, ++ next_offset); ++ break; ++ } ++ case SQUASHFS_LREG_TYPE: { ++ unsigned int frag_size; ++ long long frag_blk; ++ struct squashfs_lreg_inode_header *inodep = &id.lreg; ++ struct squashfs_lreg_inode_header *sinodep = &sid.lreg; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_LREG_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ frag_blk = SQUASHFS_INVALID_BLK; ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG && ++ !get_fragment_location(s, ++ inodep->fragment, &frag_blk, &frag_size)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->file_size; ++ i->i_fop = &generic_ro_fops; ++ i->i_mode |= S_IFREG; ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1; ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk; ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size; ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ if (sblk->block_size > 4096) ++ i->i_data.a_ops = &squashfs_aops; ++ else ++ i->i_data.a_ops = &squashfs_aops_4K; ++ ++ TRACE("File inode %x:%x, start_block %llx, " ++ "block_list_start %llx, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, next_block, ++ next_offset); ++ break; ++ } ++ case SQUASHFS_DIR_TYPE: { ++ struct squashfs_dir_inode_header *inodep = &id.dir; ++ struct squashfs_dir_inode_header *sinodep = &sid.dir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DIR_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops; ++ i->i_fop = &squashfs_dir_ops; ++ i->i_mode |= S_IFDIR; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = 0; ++ SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode; ++ ++ TRACE("Directory inode %x:%x, start_block %x, offset " ++ "%x\n", SQUASHFS_INODE_BLK(inode), ++ offset, inodep->start_block, ++ inodep->offset); ++ break; ++ } ++ case SQUASHFS_LDIR_TYPE: { ++ struct squashfs_ldir_inode_header *inodep = &id.ldir; ++ struct squashfs_ldir_inode_header *sinodep = &sid.ldir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_LDIR_INODE_HEADER(inodep, ++ sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops; ++ i->i_fop = &squashfs_dir_ops; ++ i->i_mode |= S_IFDIR; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_start = next_block; ++ SQUASHFS_I(i)->u.s2.directory_index_offset = ++ next_offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = ++ inodep->i_count; ++ SQUASHFS_I(i)->u.s2.parent_inode = inodep->parent_inode; ++ ++ TRACE("Long directory inode %x:%x, start_block %x, " ++ "offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, inodep->offset); ++ break; ++ } ++ case SQUASHFS_SYMLINK_TYPE: { ++ struct squashfs_symlink_inode_header *inodep = ++ &id.symlink; ++ struct squashfs_symlink_inode_header *sinodep = ++ &sid.symlink; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_SYMLINK_INODE_HEADER(inodep, ++ sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_size = inodep->symlink_size; ++ i->i_op = &page_symlink_inode_operations; ++ i->i_data.a_ops = &squashfs_symlink_aops; ++ i->i_mode |= S_IFLNK; ++ SQUASHFS_I(i)->start_block = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ ++ TRACE("Symbolic link inode %x:%x, start_block %llx, " ++ "offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_BLKDEV_TYPE: ++ case SQUASHFS_CHRDEV_TYPE: { ++ struct squashfs_dev_inode_header *inodep = &id.dev; ++ struct squashfs_dev_inode_header *sinodep = &sid.dev; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DEV_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_mode |= (inodeb->inode_type == ++ SQUASHFS_CHRDEV_TYPE) ? S_IFCHR : ++ S_IFBLK; ++ init_special_inode(i, i->i_mode, ++ old_decode_dev(inodep->rdev)); ++ ++ TRACE("Device inode %x:%x, rdev %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->rdev); ++ break; ++ } ++ case SQUASHFS_FIFO_TYPE: ++ case SQUASHFS_SOCKET_TYPE: { ++ struct squashfs_ipc_inode_header *inodep = &id.ipc; ++ struct squashfs_ipc_inode_header *sinodep = &sid.ipc; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_IPC_INODE_HEADER(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = inodep->nlink; ++ i->i_mode |= (inodeb->inode_type == SQUASHFS_FIFO_TYPE) ++ ? S_IFIFO : S_IFSOCK; ++ init_special_inode(i, i->i_mode, 0); ++ break; ++ } ++ default: ++ ERROR("Unknown inode type %d in squashfs_iget!\n", ++ inodeb->inode_type); ++ goto failed_read1; ++ } ++ ++ return 1; ++ ++failed_read: ++ ERROR("Unable to read inode [%llx:%x]\n", block, offset); ++ ++failed_read1: ++ make_bad_inode(i); ++ return 0; ++} ++ ++ ++static int read_inode_lookup_table(struct super_block *s) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(sblk->inodes); ++ ++ TRACE("In read_inode_lookup_table, length %d\n", length); ++ ++ /* Allocate inode lookup table */ ++ if (!(msblk->inode_lookup_table = kmalloc(length, GFP_KERNEL))) { ++ ERROR("Failed to allocate inode lookup table\n"); ++ return 0; ++ } ++ ++ if (!squashfs_read_data(s, (char *) msblk->inode_lookup_table, ++ sblk->lookup_table_start, length | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) { ++ ERROR("unable to read inode lookup table\n"); ++ return 0; ++ } ++ ++ if (msblk->swap) { ++ int i; ++ long long block; ++ ++ for (i = 0; i < SQUASHFS_LOOKUP_BLOCKS(sblk->inodes); i++) { ++ SQUASHFS_SWAP_LOOKUP_BLOCKS((&block), ++ &msblk->inode_lookup_table[i], 1); ++ msblk->inode_lookup_table[i] = block; ++ } ++ } ++ ++ return 1; ++} ++ ++ ++static int read_fragment_index_table(struct super_block *s) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(sblk->fragments); ++ ++ if(length == 0) ++ return 1; ++ ++ /* Allocate fragment index table */ ++ if (!(msblk->fragment_index = kmalloc(length, GFP_KERNEL))) { ++ ERROR("Failed to allocate fragment index table\n"); ++ return 0; ++ } ++ ++ if (!squashfs_read_data(s, (char *) msblk->fragment_index, ++ sblk->fragment_table_start, length | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length)) { ++ ERROR("unable to read fragment index table\n"); ++ return 0; ++ } ++ ++ if (msblk->swap) { ++ int i; ++ long long fragment; ++ ++ for (i = 0; i < SQUASHFS_FRAGMENT_INDEXES(sblk->fragments); i++) { ++ SQUASHFS_SWAP_FRAGMENT_INDEXES((&fragment), ++ &msblk->fragment_index[i], 1); ++ msblk->fragment_index[i] = fragment; ++ } ++ } ++ ++ return 1; ++} ++ ++ ++static int supported_squashfs_filesystem(struct squashfs_sb_info *msblk, int silent) ++{ ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ msblk->read_inode = squashfs_read_inode; ++ msblk->read_blocklist = read_blocklist; ++ msblk->read_fragment_index_table = read_fragment_index_table; ++ ++ if (sblk->s_major == 1) { ++ if (!squashfs_1_0_supported(msblk)) { ++ SERROR("Major/Minor mismatch, Squashfs 1.0 filesystems " ++ "are unsupported\n"); ++ SERROR("Please recompile with " ++ "Squashfs 1.0 support enabled\n"); ++ return 0; ++ } ++ } else if (sblk->s_major == 2) { ++ if (!squashfs_2_0_supported(msblk)) { ++ SERROR("Major/Minor mismatch, Squashfs 2.0 filesystems " ++ "are unsupported\n"); ++ SERROR("Please recompile with " ++ "Squashfs 2.0 support enabled\n"); ++ return 0; ++ } ++ } else if(sblk->s_major != SQUASHFS_MAJOR || sblk->s_minor > ++ SQUASHFS_MINOR) { ++ SERROR("Major/Minor mismatch, trying to mount newer %d.%d " ++ "filesystem\n", sblk->s_major, sblk->s_minor); ++ SERROR("Please update your kernel\n"); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++ ++static int squashfs_fill_super(struct super_block *s, void *data, int silent) ++{ ++ struct squashfs_sb_info *msblk; ++ struct squashfs_super_block *sblk; ++ int i; ++ char b[BDEVNAME_SIZE]; ++ struct inode *root; ++ ++ TRACE("Entered squashfs_read_superblock\n"); ++ ++ if (!(s->s_fs_info = kmalloc(sizeof(struct squashfs_sb_info), ++ GFP_KERNEL))) { ++ ERROR("Failed to allocate superblock\n"); ++ goto failure; ++ } ++ memset(s->s_fs_info, 0, sizeof(struct squashfs_sb_info)); ++ msblk = s->s_fs_info; ++ if (!(msblk->stream.workspace = vmalloc(zlib_inflate_workspacesize()))) { ++ ERROR("Failed to allocate zlib workspace\n"); ++ goto failure; ++ } ++ sblk = &msblk->sblk; ++ ++ msblk->devblksize = sb_min_blocksize(s, BLOCK_SIZE); ++ msblk->devblksize_log2 = ffz(~msblk->devblksize); ++ ++ mutex_init(&msblk->read_data_mutex); ++ mutex_init(&msblk->read_page_mutex); ++ mutex_init(&msblk->block_cache_mutex); ++ mutex_init(&msblk->fragment_mutex); ++ mutex_init(&msblk->meta_index_mutex); ++ ++ init_waitqueue_head(&msblk->waitq); ++ init_waitqueue_head(&msblk->fragment_wait_queue); ++ ++ sblk->bytes_used = sizeof(struct squashfs_super_block); ++ if (!squashfs_read_data(s, (char *) sblk, SQUASHFS_START, ++ sizeof(struct squashfs_super_block) | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, sizeof(struct squashfs_super_block))) { ++ SERROR("unable to read superblock\n"); ++ goto failed_mount; ++ } ++ ++ /* Check it is a SQUASHFS superblock */ ++ msblk->swap = 0; ++ if ((s->s_magic = sblk->s_magic) != SQUASHFS_MAGIC) { ++ if (sblk->s_magic == SQUASHFS_MAGIC_SWAP) { ++ struct squashfs_super_block ssblk; ++ ++ WARNING("Mounting a different endian SQUASHFS " ++ "filesystem on %s\n", bdevname(s->s_bdev, b)); ++ ++ SQUASHFS_SWAP_SUPER_BLOCK(&ssblk, sblk); ++ memcpy(sblk, &ssblk, sizeof(struct squashfs_super_block)); ++ msblk->swap = 1; ++ } else { ++ SERROR("Can't find a SQUASHFS superblock on %s\n", ++ bdevname(s->s_bdev, b)); ++ goto failed_mount; ++ } ++ } ++ ++ /* Check the MAJOR & MINOR versions */ ++ if(!supported_squashfs_filesystem(msblk, silent)) ++ goto failed_mount; ++ ++ /* Check the filesystem does not extend beyond the end of the ++ block device */ ++ if(sblk->bytes_used < 0 || sblk->bytes_used > i_size_read(s->s_bdev->bd_inode)) ++ goto failed_mount; ++ ++ /* Check the root inode for sanity */ ++ if (SQUASHFS_INODE_OFFSET(sblk->root_inode) > SQUASHFS_METADATA_SIZE) ++ goto failed_mount; ++ ++ TRACE("Found valid superblock on %s\n", bdevname(s->s_bdev, b)); ++ TRACE("Inodes are %scompressed\n", ++ SQUASHFS_UNCOMPRESSED_INODES ++ (sblk->flags) ? "un" : ""); ++ TRACE("Data is %scompressed\n", ++ SQUASHFS_UNCOMPRESSED_DATA(sblk->flags) ++ ? "un" : ""); ++ TRACE("Check data is %s present in the filesystem\n", ++ SQUASHFS_CHECK_DATA(sblk->flags) ? ++ "" : "not"); ++ TRACE("Filesystem size %lld bytes\n", sblk->bytes_used); ++ TRACE("Block size %d\n", sblk->block_size); ++ TRACE("Number of inodes %d\n", sblk->inodes); ++ if (sblk->s_major > 1) ++ TRACE("Number of fragments %d\n", sblk->fragments); ++ TRACE("Number of uids %d\n", sblk->no_uids); ++ TRACE("Number of gids %d\n", sblk->no_guids); ++ TRACE("sblk->inode_table_start %llx\n", sblk->inode_table_start); ++ TRACE("sblk->directory_table_start %llx\n", sblk->directory_table_start); ++ if (sblk->s_major > 1) ++ TRACE("sblk->fragment_table_start %llx\n", ++ sblk->fragment_table_start); ++ TRACE("sblk->uid_start %llx\n", sblk->uid_start); ++ ++ s->s_flags |= MS_RDONLY; ++ s->s_op = &squashfs_super_ops; ++ ++ /* Init inode_table block pointer array */ ++ if (!(msblk->block_cache = kmalloc(sizeof(struct squashfs_cache) * ++ SQUASHFS_CACHED_BLKS, GFP_KERNEL))) { ++ ERROR("Failed to allocate block cache\n"); ++ goto failed_mount; ++ } ++ ++ for (i = 0; i < SQUASHFS_CACHED_BLKS; i++) ++ msblk->block_cache[i].block = SQUASHFS_INVALID_BLK; ++ ++ msblk->next_cache = 0; ++ ++ /* Allocate read_page block */ ++ if (!(msblk->read_page = kmalloc(sblk->block_size, GFP_KERNEL))) { ++ ERROR("Failed to allocate read_page block\n"); ++ goto failed_mount; ++ } ++ ++ /* Allocate uid and gid tables */ ++ if (!(msblk->uid = kmalloc((sblk->no_uids + sblk->no_guids) * ++ sizeof(unsigned int), GFP_KERNEL))) { ++ ERROR("Failed to allocate uid/gid table\n"); ++ goto failed_mount; ++ } ++ msblk->guid = msblk->uid + sblk->no_uids; ++ ++ if (msblk->swap) { ++ unsigned int suid[sblk->no_uids + sblk->no_guids]; ++ ++ if (!squashfs_read_data(s, (char *) &suid, sblk->uid_start, ++ ((sblk->no_uids + sblk->no_guids) * ++ sizeof(unsigned int)) | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, (sblk->no_uids + sblk->no_guids) * sizeof(unsigned int))) { ++ ERROR("unable to read uid/gid table\n"); ++ goto failed_mount; ++ } ++ ++ SQUASHFS_SWAP_DATA(msblk->uid, suid, (sblk->no_uids + ++ sblk->no_guids), (sizeof(unsigned int) * 8)); ++ } else ++ if (!squashfs_read_data(s, (char *) msblk->uid, sblk->uid_start, ++ ((sblk->no_uids + sblk->no_guids) * ++ sizeof(unsigned int)) | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, (sblk->no_uids + sblk->no_guids) * sizeof(unsigned int))) { ++ ERROR("unable to read uid/gid table\n"); ++ goto failed_mount; ++ } ++ ++ ++ if (sblk->s_major == 1 && squashfs_1_0_supported(msblk)) ++ goto allocate_root; ++ ++ if (!(msblk->fragment = kmalloc(sizeof(struct squashfs_fragment_cache) * ++ SQUASHFS_CACHED_FRAGMENTS, GFP_KERNEL))) { ++ ERROR("Failed to allocate fragment block cache\n"); ++ goto failed_mount; ++ } ++ ++ for (i = 0; i < SQUASHFS_CACHED_FRAGMENTS; i++) { ++ msblk->fragment[i].locked = 0; ++ msblk->fragment[i].block = SQUASHFS_INVALID_BLK; ++ msblk->fragment[i].data = NULL; ++ } ++ ++ msblk->next_fragment = 0; ++ ++ /* Allocate and read fragment index table */ ++ if (msblk->read_fragment_index_table(s) == 0) ++ goto failed_mount; ++ ++ if(sblk->s_major < 3 || sblk->lookup_table_start == SQUASHFS_INVALID_BLK) ++ goto allocate_root; ++ ++ /* Allocate and read inode lookup table */ ++ if (read_inode_lookup_table(s) == 0) ++ goto failed_mount; ++ ++ s->s_op = &squashfs_export_super_ops; ++ s->s_export_op = &squashfs_export_ops; ++ ++allocate_root: ++ root = new_inode(s); ++ if ((msblk->read_inode)(root, sblk->root_inode) == 0) ++ goto failed_mount; ++ insert_inode_hash(root); ++ ++ if ((s->s_root = d_alloc_root(root)) == NULL) { ++ ERROR("Root inode create failed\n"); ++ iput(root); ++ goto failed_mount; ++ } ++ ++ TRACE("Leaving squashfs_read_super\n"); ++ return 0; ++ ++failed_mount: ++ kfree(msblk->inode_lookup_table); ++ kfree(msblk->fragment_index); ++ kfree(msblk->fragment); ++ kfree(msblk->uid); ++ kfree(msblk->read_page); ++ kfree(msblk->block_cache); ++ kfree(msblk->fragment_index_2); ++ vfree(msblk->stream.workspace); ++ kfree(s->s_fs_info); ++ s->s_fs_info = NULL; ++ return -EINVAL; ++ ++failure: ++ return -ENOMEM; ++} ++ ++ ++static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ TRACE("Entered squashfs_statfs\n"); ++ ++ buf->f_type = SQUASHFS_MAGIC; ++ buf->f_bsize = sblk->block_size; ++ buf->f_blocks = ((sblk->bytes_used - 1) >> sblk->block_log) + 1; ++ buf->f_bfree = buf->f_bavail = 0; ++ buf->f_files = sblk->inodes; ++ buf->f_ffree = 0; ++ buf->f_namelen = SQUASHFS_NAME_LEN; ++ ++ return 0; ++} ++ ++ ++static int squashfs_symlink_readpage(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ int index = page->index << PAGE_CACHE_SHIFT, length, bytes; ++ long long block = SQUASHFS_I(inode)->start_block; ++ int offset = SQUASHFS_I(inode)->offset; ++ void *pageaddr = kmap(page); ++ ++ TRACE("Entered squashfs_symlink_readpage, page index %ld, start block " ++ "%llx, offset %x\n", page->index, ++ SQUASHFS_I(inode)->start_block, ++ SQUASHFS_I(inode)->offset); ++ ++ for (length = 0; length < index; length += bytes) { ++ if (!(bytes = squashfs_get_cached_block(inode->i_sb, NULL, ++ block, offset, PAGE_CACHE_SIZE, &block, ++ &offset))) { ++ ERROR("Unable to read symbolic link [%llx:%x]\n", block, ++ offset); ++ goto skip_read; ++ } ++ } ++ ++ if (length != index) { ++ ERROR("(squashfs_symlink_readpage) length != index\n"); ++ bytes = 0; ++ goto skip_read; ++ } ++ ++ bytes = (i_size_read(inode) - length) > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : ++ i_size_read(inode) - length; ++ ++ if (!(bytes = squashfs_get_cached_block(inode->i_sb, pageaddr, block, ++ offset, bytes, &block, &offset))) ++ ERROR("Unable to read symbolic link [%llx:%x]\n", block, offset); ++ ++skip_read: ++ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap(page); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ return 0; ++} ++ ++ ++struct meta_index *locate_meta_index(struct inode *inode, int index, int offset) ++{ ++ struct meta_index *meta = NULL; ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ int i; ++ ++ mutex_lock(&msblk->meta_index_mutex); ++ ++ TRACE("locate_meta_index: index %d, offset %d\n", index, offset); ++ ++ if(msblk->meta_index == NULL) ++ goto not_allocated; ++ ++ for (i = 0; i < SQUASHFS_META_NUMBER; i ++) ++ if (msblk->meta_index[i].inode_number == inode->i_ino && ++ msblk->meta_index[i].offset >= offset && ++ msblk->meta_index[i].offset <= index && ++ msblk->meta_index[i].locked == 0) { ++ TRACE("locate_meta_index: entry %d, offset %d\n", i, ++ msblk->meta_index[i].offset); ++ meta = &msblk->meta_index[i]; ++ offset = meta->offset; ++ } ++ ++ if (meta) ++ meta->locked = 1; ++ ++not_allocated: ++ mutex_unlock(&msblk->meta_index_mutex); ++ ++ return meta; ++} ++ ++ ++struct meta_index *empty_meta_index(struct inode *inode, int offset, int skip) ++{ ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ struct meta_index *meta = NULL; ++ int i; ++ ++ mutex_lock(&msblk->meta_index_mutex); ++ ++ TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip); ++ ++ if(msblk->meta_index == NULL) { ++ if (!(msblk->meta_index = kmalloc(sizeof(struct meta_index) * ++ SQUASHFS_META_NUMBER, GFP_KERNEL))) { ++ ERROR("Failed to allocate meta_index\n"); ++ goto failed; ++ } ++ for(i = 0; i < SQUASHFS_META_NUMBER; i++) { ++ msblk->meta_index[i].inode_number = 0; ++ msblk->meta_index[i].locked = 0; ++ } ++ msblk->next_meta_index = 0; ++ } ++ ++ for(i = SQUASHFS_META_NUMBER; i && ++ msblk->meta_index[msblk->next_meta_index].locked; i --) ++ msblk->next_meta_index = (msblk->next_meta_index + 1) % ++ SQUASHFS_META_NUMBER; ++ ++ if(i == 0) { ++ TRACE("empty_meta_index: failed!\n"); ++ goto failed; ++ } ++ ++ TRACE("empty_meta_index: returned meta entry %d, %p\n", ++ msblk->next_meta_index, ++ &msblk->meta_index[msblk->next_meta_index]); ++ ++ meta = &msblk->meta_index[msblk->next_meta_index]; ++ msblk->next_meta_index = (msblk->next_meta_index + 1) % ++ SQUASHFS_META_NUMBER; ++ ++ meta->inode_number = inode->i_ino; ++ meta->offset = offset; ++ meta->skip = skip; ++ meta->entries = 0; ++ meta->locked = 1; ++ ++failed: ++ mutex_unlock(&msblk->meta_index_mutex); ++ return meta; ++} ++ ++ ++void release_meta_index(struct inode *inode, struct meta_index *meta) ++{ ++ meta->locked = 0; ++ smp_mb(); ++} ++ ++ ++static int read_block_index(struct super_block *s, int blocks, char *block_list, ++ long long *start_block, int *offset) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ unsigned int *block_listp; ++ int block = 0; ++ ++ if (msblk->swap) { ++ char sblock_list[blocks << 2]; ++ ++ if (!squashfs_get_cached_block(s, sblock_list, *start_block, ++ *offset, blocks << 2, start_block, offset)) { ++ ERROR("Unable to read block list [%llx:%x]\n", ++ *start_block, *offset); ++ goto failure; ++ } ++ SQUASHFS_SWAP_INTS(((unsigned int *)block_list), ++ ((unsigned int *)sblock_list), blocks); ++ } else ++ if (!squashfs_get_cached_block(s, block_list, *start_block, ++ *offset, blocks << 2, start_block, offset)) { ++ ERROR("Unable to read block list [%llx:%x]\n", ++ *start_block, *offset); ++ goto failure; ++ } ++ ++ for (block_listp = (unsigned int *) block_list; blocks; ++ block_listp++, blocks --) ++ block += SQUASHFS_COMPRESSED_SIZE_BLOCK(*block_listp); ++ ++ return block; ++ ++failure: ++ return -1; ++} ++ ++ ++#define SIZE 256 ++ ++static inline int calculate_skip(int blocks) { ++ int skip = (blocks - 1) / ((SQUASHFS_SLOTS * SQUASHFS_META_ENTRIES + 1) * SQUASHFS_META_INDEXES); ++ return skip >= 7 ? 7 : skip + 1; ++} ++ ++ ++static int get_meta_index(struct inode *inode, int index, ++ long long *index_block, int *index_offset, ++ long long *data_block, char *block_list) ++{ ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int skip = calculate_skip(i_size_read(inode) >> sblk->block_log); ++ int offset = 0; ++ struct meta_index *meta; ++ struct meta_entry *meta_entry; ++ long long cur_index_block = SQUASHFS_I(inode)->u.s1.block_list_start; ++ int cur_offset = SQUASHFS_I(inode)->offset; ++ long long cur_data_block = SQUASHFS_I(inode)->start_block; ++ int i; ++ ++ index /= SQUASHFS_META_INDEXES * skip; ++ ++ while ( offset < index ) { ++ meta = locate_meta_index(inode, index, offset + 1); ++ ++ if (meta == NULL) { ++ if ((meta = empty_meta_index(inode, offset + 1, ++ skip)) == NULL) ++ goto all_done; ++ } else { ++ if(meta->entries == 0) ++ goto failed; ++ offset = index < meta->offset + meta->entries ? index : ++ meta->offset + meta->entries - 1; ++ meta_entry = &meta->meta_entry[offset - meta->offset]; ++ cur_index_block = meta_entry->index_block + sblk->inode_table_start; ++ cur_offset = meta_entry->offset; ++ cur_data_block = meta_entry->data_block; ++ TRACE("get_meta_index: offset %d, meta->offset %d, " ++ "meta->entries %d\n", offset, meta->offset, ++ meta->entries); ++ TRACE("get_meta_index: index_block 0x%llx, offset 0x%x" ++ " data_block 0x%llx\n", cur_index_block, ++ cur_offset, cur_data_block); ++ } ++ ++ for (i = meta->offset + meta->entries; i <= index && ++ i < meta->offset + SQUASHFS_META_ENTRIES; i++) { ++ int blocks = skip * SQUASHFS_META_INDEXES; ++ ++ while (blocks) { ++ int block = blocks > (SIZE >> 2) ? (SIZE >> 2) : ++ blocks; ++ int res = read_block_index(inode->i_sb, block, ++ block_list, &cur_index_block, ++ &cur_offset); ++ ++ if (res == -1) ++ goto failed; ++ ++ cur_data_block += res; ++ blocks -= block; ++ } ++ ++ meta_entry = &meta->meta_entry[i - meta->offset]; ++ meta_entry->index_block = cur_index_block - sblk->inode_table_start; ++ meta_entry->offset = cur_offset; ++ meta_entry->data_block = cur_data_block; ++ meta->entries ++; ++ offset ++; ++ } ++ ++ TRACE("get_meta_index: meta->offset %d, meta->entries %d\n", ++ meta->offset, meta->entries); ++ ++ release_meta_index(inode, meta); ++ } ++ ++all_done: ++ *index_block = cur_index_block; ++ *index_offset = cur_offset; ++ *data_block = cur_data_block; ++ ++ return offset * SQUASHFS_META_INDEXES * skip; ++ ++failed: ++ release_meta_index(inode, meta); ++ return -1; ++} ++ ++ ++static long long read_blocklist(struct inode *inode, int index, ++ int readahead_blks, char *block_list, ++ unsigned short **block_p, unsigned int *bsize) ++{ ++ long long block_ptr; ++ int offset; ++ long long block; ++ int res = get_meta_index(inode, index, &block_ptr, &offset, &block, ++ block_list); ++ ++ TRACE("read_blocklist: res %d, index %d, block_ptr 0x%llx, offset" ++ " 0x%x, block 0x%llx\n", res, index, block_ptr, offset, ++ block); ++ ++ if(res == -1) ++ goto failure; ++ ++ index -= res; ++ ++ while ( index ) { ++ int blocks = index > (SIZE >> 2) ? (SIZE >> 2) : index; ++ int res = read_block_index(inode->i_sb, blocks, block_list, ++ &block_ptr, &offset); ++ if (res == -1) ++ goto failure; ++ block += res; ++ index -= blocks; ++ } ++ ++ if (read_block_index(inode->i_sb, 1, block_list, ++ &block_ptr, &offset) == -1) ++ goto failure; ++ *bsize = *((unsigned int *) block_list); ++ ++ return block; ++ ++failure: ++ return 0; ++} ++ ++ ++static int squashfs_readpage(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned char *block_list; ++ long long block; ++ unsigned int bsize, i = 0, bytes = 0, byte_offset = 0; ++ int index = page->index >> (sblk->block_log - PAGE_CACHE_SHIFT); ++ void *pageaddr; ++ struct squashfs_fragment_cache *fragment = NULL; ++ char *data_ptr = msblk->read_page; ++ ++ int mask = (1 << (sblk->block_log - PAGE_CACHE_SHIFT)) - 1; ++ int start_index = page->index & ~mask; ++ int end_index = start_index | mask; ++ ++ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", ++ page->index, ++ SQUASHFS_I(inode)->start_block); ++ ++ if (!(block_list = kmalloc(SIZE, GFP_KERNEL))) { ++ ERROR("Failed to allocate block_list\n"); ++ goto skip_read; ++ } ++ ++ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> ++ PAGE_CACHE_SHIFT)) ++ goto skip_read; ++ ++ if (SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK ++ || index < (i_size_read(inode) >> ++ sblk->block_log)) { ++ if ((block = (msblk->read_blocklist)(inode, index, 1, ++ block_list, NULL, &bsize)) == 0) ++ goto skip_read; ++ ++ mutex_lock(&msblk->read_page_mutex); ++ ++ if (!(bytes = squashfs_read_data(inode->i_sb, msblk->read_page, ++ block, bsize, NULL, sblk->block_size))) { ++ ERROR("Unable to read page, block %llx, size %x\n", block, ++ bsize); ++ mutex_unlock(&msblk->read_page_mutex); ++ goto skip_read; ++ } ++ } else { ++ if ((fragment = get_cached_fragment(inode->i_sb, ++ SQUASHFS_I(inode)-> ++ u.s1.fragment_start_block, ++ SQUASHFS_I(inode)->u.s1.fragment_size)) ++ == NULL) { ++ ERROR("Unable to read page, block %llx, size %x\n", ++ SQUASHFS_I(inode)-> ++ u.s1.fragment_start_block, ++ (int) SQUASHFS_I(inode)-> ++ u.s1.fragment_size); ++ goto skip_read; ++ } ++ bytes = SQUASHFS_I(inode)->u.s1.fragment_offset + ++ (i_size_read(inode) & (sblk->block_size ++ - 1)); ++ byte_offset = SQUASHFS_I(inode)->u.s1.fragment_offset; ++ data_ptr = fragment->data; ++ } ++ ++ for (i = start_index; i <= end_index && byte_offset < bytes; ++ i++, byte_offset += PAGE_CACHE_SIZE) { ++ struct page *push_page; ++ int avail = (bytes - byte_offset) > PAGE_CACHE_SIZE ? ++ PAGE_CACHE_SIZE : bytes - byte_offset; ++ ++ TRACE("bytes %d, i %d, byte_offset %d, available_bytes %d\n", ++ bytes, i, byte_offset, avail); ++ ++ push_page = (i == page->index) ? page : ++ grab_cache_page_nowait(page->mapping, i); ++ ++ if (!push_page) ++ continue; ++ ++ if (PageUptodate(push_page)) ++ goto skip_page; ++ ++ pageaddr = kmap_atomic(push_page, KM_USER0); ++ memcpy(pageaddr, data_ptr + byte_offset, avail); ++ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); ++ kunmap_atomic(pageaddr, KM_USER0); ++ flush_dcache_page(push_page); ++ SetPageUptodate(push_page); ++skip_page: ++ unlock_page(push_page); ++ if(i != page->index) ++ page_cache_release(push_page); ++ } ++ ++ if (SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK ++ || index < (i_size_read(inode) >> ++ sblk->block_log)) ++ mutex_unlock(&msblk->read_page_mutex); ++ else ++ release_cached_fragment(msblk, fragment); ++ ++ kfree(block_list); ++ return 0; ++ ++skip_read: ++ pageaddr = kmap_atomic(page, KM_USER0); ++ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap_atomic(pageaddr, KM_USER0); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ kfree(block_list); ++ return 0; ++} ++ ++ ++static int squashfs_readpage4K(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned char *block_list; ++ long long block; ++ unsigned int bsize, bytes = 0; ++ void *pageaddr; ++ ++ TRACE("Entered squashfs_readpage4K, page index %lx, start block %llx\n", ++ page->index, ++ SQUASHFS_I(inode)->start_block); ++ ++ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> ++ PAGE_CACHE_SHIFT)) { ++ block_list = NULL; ++ goto skip_read; ++ } ++ ++ if (!(block_list = kmalloc(SIZE, GFP_KERNEL))) { ++ ERROR("Failed to allocate block_list\n"); ++ goto skip_read; ++ } ++ ++ if (SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK ++ || page->index < (i_size_read(inode) >> ++ sblk->block_log)) { ++ block = (msblk->read_blocklist)(inode, page->index, 1, ++ block_list, NULL, &bsize); ++ if(block == 0) ++ goto skip_read; ++ ++ mutex_lock(&msblk->read_page_mutex); ++ bytes = squashfs_read_data(inode->i_sb, msblk->read_page, block, ++ bsize, NULL, sblk->block_size); ++ if (bytes) { ++ pageaddr = kmap_atomic(page, KM_USER0); ++ memcpy(pageaddr, msblk->read_page, bytes); ++ kunmap_atomic(pageaddr, KM_USER0); ++ } else ++ ERROR("Unable to read page, block %llx, size %x\n", ++ block, bsize); ++ mutex_unlock(&msblk->read_page_mutex); ++ } else { ++ struct squashfs_fragment_cache *fragment = ++ get_cached_fragment(inode->i_sb, ++ SQUASHFS_I(inode)-> ++ u.s1.fragment_start_block, ++ SQUASHFS_I(inode)-> u.s1.fragment_size); ++ if (fragment) { ++ bytes = i_size_read(inode) & (sblk->block_size - 1); ++ pageaddr = kmap_atomic(page, KM_USER0); ++ memcpy(pageaddr, fragment->data + SQUASHFS_I(inode)-> ++ u.s1.fragment_offset, bytes); ++ kunmap_atomic(pageaddr, KM_USER0); ++ release_cached_fragment(msblk, fragment); ++ } else ++ ERROR("Unable to read page, block %llx, size %x\n", ++ SQUASHFS_I(inode)-> ++ u.s1.fragment_start_block, (int) ++ SQUASHFS_I(inode)-> u.s1.fragment_size); ++ } ++ ++skip_read: ++ pageaddr = kmap_atomic(page, KM_USER0); ++ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap_atomic(pageaddr, KM_USER0); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ kfree(block_list); ++ return 0; ++} ++ ++ ++static int get_dir_index_using_offset(struct super_block *s, long long ++ *next_block, unsigned int *next_offset, ++ long long index_start, ++ unsigned int index_offset, int i_count, ++ long long f_pos) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index index; ++ ++ TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %d\n", ++ i_count, (unsigned int) f_pos); ++ ++ f_pos =- 3; ++ if (f_pos == 0) ++ goto finish; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, ++ index_start, index_offset, ++ sizeof(sindex), &index_start, ++ &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX(&index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) &index, ++ index_start, index_offset, ++ sizeof(index), &index_start, ++ &index_offset); ++ ++ if (index.index > f_pos) ++ break; ++ ++ squashfs_get_cached_block(s, NULL, index_start, index_offset, ++ index.size + 1, &index_start, ++ &index_offset); ++ ++ length = index.index; ++ *next_block = index.start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ ++finish: ++ return length + 3; ++} ++ ++ ++static int get_dir_index_using_name(struct super_block *s, long long ++ *next_block, unsigned int *next_offset, ++ long long index_start, ++ unsigned int index_offset, int i_count, ++ const char *name, int size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index *index; ++ char *str; ++ ++ TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); ++ ++ if (!(str = kmalloc(sizeof(struct squashfs_dir_index) + ++ (SQUASHFS_NAME_LEN + 1) * 2, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_index\n"); ++ goto failure; ++ } ++ ++ index = (struct squashfs_dir_index *) (str + SQUASHFS_NAME_LEN + 1); ++ strncpy(str, name, size); ++ str[size] = '\0'; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, ++ index_start, index_offset, ++ sizeof(sindex), &index_start, ++ &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX(index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) index, ++ index_start, index_offset, ++ sizeof(struct squashfs_dir_index), ++ &index_start, &index_offset); ++ ++ squashfs_get_cached_block(s, index->name, index_start, ++ index_offset, index->size + 1, ++ &index_start, &index_offset); ++ ++ index->name[index->size + 1] = '\0'; ++ ++ if (strcmp(index->name, str) > 0) ++ break; ++ ++ length = index->index; ++ *next_block = index->start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ kfree(str); ++failure: ++ return length + 3; ++} ++ ++ ++static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ struct inode *i = file->f_dentry->d_inode; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, ++ dir_count; ++ struct squashfs_dir_header dirh; ++ struct squashfs_dir_entry *dire; ++ ++ TRACE("Entered squashfs_readdir [%llx:%x]\n", next_block, next_offset); ++ ++ if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto finish; ++ } ++ ++ while(file->f_pos < 3) { ++ char *name; ++ int size, i_ino; ++ ++ if(file->f_pos == 0) { ++ name = "."; ++ size = 1; ++ i_ino = i->i_ino; ++ } else { ++ name = ".."; ++ size = 2; ++ i_ino = SQUASHFS_I(i)->u.s2.parent_inode; ++ } ++ TRACE("Calling filldir(%x, %s, %d, %d, %d, %d)\n", ++ (unsigned int) dirent, name, size, (int) ++ file->f_pos, i_ino, ++ squashfs_filetype_table[1]); ++ ++ if (filldir(dirent, name, size, ++ file->f_pos, i_ino, ++ squashfs_filetype_table[1]) < 0) { ++ TRACE("Filldir returned less than 0\n"); ++ goto finish; ++ } ++ file->f_pos += size; ++ } ++ ++ length = get_dir_index_using_offset(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, ++ file->f_pos); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header sdirh; ++ ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, ++ next_block, next_offset, sizeof(sdirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh, ++ next_block, next_offset, sizeof(dirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry sdire; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ &sdire, next_block, next_offset, ++ sizeof(sdire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ dire, next_block, next_offset, ++ sizeof(*dire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, ++ next_block, next_offset, ++ dire->size + 1, &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (file->f_pos >= length) ++ continue; ++ ++ dire->name[dire->size + 1] = '\0'; ++ ++ TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d, %d)\n", ++ (unsigned int) dirent, dire->name, ++ dire->size + 1, (int) file->f_pos, ++ dirh.start_block, dire->offset, ++ dirh.inode_number + dire->inode_number, ++ squashfs_filetype_table[dire->type]); ++ ++ if (filldir(dirent, dire->name, dire->size + 1, ++ file->f_pos, ++ dirh.inode_number + dire->inode_number, ++ squashfs_filetype_table[dire->type]) ++ < 0) { ++ TRACE("Filldir returned less than 0\n"); ++ goto finish; ++ } ++ file->f_pos = length; ++ } ++ } ++ ++finish: ++ kfree(dire); ++ return 0; ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ kfree(dire); ++ return 0; ++} ++ ++ ++static struct dentry *squashfs_lookup(struct inode *i, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ const unsigned char *name = dentry->d_name.name; ++ int len = dentry->d_name.len; ++ struct inode *inode = NULL; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, ++ dir_count; ++ struct squashfs_dir_header dirh; ++ struct squashfs_dir_entry *dire; ++ ++ TRACE("Entered squashfs_lookup [%llx:%x]\n", next_block, next_offset); ++ ++ if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto exit_lookup; ++ } ++ ++ if (len > SQUASHFS_NAME_LEN) ++ goto exit_lookup; ++ ++ length = get_dir_index_using_name(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, name, ++ len); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header sdirh; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, ++ next_block, next_offset, sizeof(sdirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh, ++ next_block, next_offset, sizeof(dirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry sdire; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ &sdire, next_block,next_offset, ++ sizeof(sdire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ dire, next_block,next_offset, ++ sizeof(*dire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, ++ next_block, next_offset, dire->size + 1, ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (name[0] < dire->name[0]) ++ goto exit_lookup; ++ ++ if ((len == dire->size + 1) && !strncmp(name, dire->name, len)) { ++ squashfs_inode_t ino = SQUASHFS_MKINODE(dirh.start_block, ++ dire->offset); ++ ++ TRACE("calling squashfs_iget for directory " ++ "entry %s, inode %x:%x, %d\n", name, ++ dirh.start_block, dire->offset, ++ dirh.inode_number + dire->inode_number); ++ ++ inode = squashfs_iget(i->i_sb, ino, dirh.inode_number + dire->inode_number); ++ ++ goto exit_lookup; ++ } ++ } ++ } ++ ++exit_lookup: ++ kfree(dire); ++ if (inode) ++ return d_splice_alias(inode, dentry); ++ d_add(dentry, inode); ++ return ERR_PTR(0); ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ goto exit_lookup; ++} ++ ++ ++static int squashfs_remount(struct super_block *s, int *flags, char *data) ++{ ++ *flags |= MS_RDONLY; ++ return 0; ++} ++ ++ ++static void squashfs_put_super(struct super_block *s) ++{ ++ int i; ++ ++ if (s->s_fs_info) { ++ struct squashfs_sb_info *sbi = s->s_fs_info; ++ if (sbi->block_cache) ++ for (i = 0; i < SQUASHFS_CACHED_BLKS; i++) ++ if (sbi->block_cache[i].block != ++ SQUASHFS_INVALID_BLK) ++ kfree(sbi->block_cache[i].data); ++ if (sbi->fragment) ++ for (i = 0; i < SQUASHFS_CACHED_FRAGMENTS; i++) ++ SQUASHFS_FREE(sbi->fragment[i].data); ++ kfree(sbi->fragment); ++ kfree(sbi->block_cache); ++ kfree(sbi->read_page); ++ kfree(sbi->uid); ++ kfree(sbi->fragment_index); ++ kfree(sbi->fragment_index_2); ++ kfree(sbi->meta_index); ++ vfree(sbi->stream.workspace); ++ kfree(s->s_fs_info); ++ s->s_fs_info = NULL; ++ } ++} ++ ++ ++static int squashfs_get_sb(struct file_system_type *fs_type, int flags, ++ const char *dev_name, void *data, ++ struct vfsmount *mnt) ++{ ++ return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super, ++ mnt); ++} ++ ++ ++static int __init init_squashfs_fs(void) ++{ ++ int err = init_inodecache(); ++ if (err) ++ goto out; ++ ++ printk(KERN_INFO "squashfs: version 3.2-r2 (2007/01/15) " ++ "Phillip Lougher\n"); ++ ++ if ((err = register_filesystem(&squashfs_fs_type))) ++ destroy_inodecache(); ++ ++out: ++ return err; ++} ++ ++ ++static void __exit exit_squashfs_fs(void) ++{ ++ unregister_filesystem(&squashfs_fs_type); ++ destroy_inodecache(); ++} ++ ++ ++static struct kmem_cache * squashfs_inode_cachep; ++ ++ ++static struct inode *squashfs_alloc_inode(struct super_block *sb) ++{ ++ struct squashfs_inode_info *ei; ++ ei = kmem_cache_alloc(squashfs_inode_cachep, GFP_KERNEL); ++ if (!ei) ++ return NULL; ++ return &ei->vfs_inode; ++} ++ ++ ++static void squashfs_destroy_inode(struct inode *inode) ++{ ++ kmem_cache_free(squashfs_inode_cachep, SQUASHFS_I(inode)); ++} ++ ++ ++static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) ++{ ++ struct squashfs_inode_info *ei = foo; ++ ++ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == ++ SLAB_CTOR_CONSTRUCTOR) ++ inode_init_once(&ei->vfs_inode); ++} ++ ++ ++static int __init init_inodecache(void) ++{ ++ squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache", ++ sizeof(struct squashfs_inode_info), ++ 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, ++ init_once, NULL); ++ if (squashfs_inode_cachep == NULL) ++ return -ENOMEM; ++ return 0; ++} ++ ++ ++static void destroy_inodecache(void) ++{ ++ kmem_cache_destroy(squashfs_inode_cachep); ++} ++ ++ ++module_init(init_squashfs_fs); ++module_exit(exit_squashfs_fs); ++MODULE_DESCRIPTION("squashfs 3.2-r2, a compressed read-only filesystem"); ++MODULE_AUTHOR("Phillip Lougher <phillip@lougher.org.uk>"); ++MODULE_LICENSE("GPL"); +diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h +new file mode 100644 +index 0000000..6f863f0 +--- /dev/null ++++ b/fs/squashfs/squashfs.h +@@ -0,0 +1,87 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007 ++ * Phillip Lougher <phillip@lougher.org.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs.h ++ */ ++ ++#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++#undef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++#endif ++ ++#ifdef SQUASHFS_TRACE ++#define TRACE(s, args...) printk(KERN_NOTICE "SQUASHFS: "s, ## args) ++#else ++#define TRACE(s, args...) {} ++#endif ++ ++#define ERROR(s, args...) printk(KERN_ERR "SQUASHFS error: "s, ## args) ++ ++#define SERROR(s, args...) do { \ ++ if (!silent) \ ++ printk(KERN_ERR "SQUASHFS error: "s, ## args);\ ++ } while(0) ++ ++#define WARNING(s, args...) printk(KERN_WARNING "SQUASHFS: "s, ## args) ++ ++static inline struct squashfs_inode_info *SQUASHFS_I(struct inode *inode) ++{ ++ return list_entry(inode, struct squashfs_inode_info, vfs_inode); ++} ++ ++#if defined(CONFIG_SQUASHFS_1_0_COMPATIBILITY ) || defined(CONFIG_SQUASHFS_2_0_COMPATIBILITY) ++#define SQSH_EXTERN ++extern unsigned int squashfs_read_data(struct super_block *s, char *buffer, ++ long long index, unsigned int length, ++ long long *next_index, int srclength); ++extern int squashfs_get_cached_block(struct super_block *s, char *buffer, ++ long long block, unsigned int offset, ++ int length, long long *next_block, ++ unsigned int *next_offset); ++extern void release_cached_fragment(struct squashfs_sb_info *msblk, struct ++ squashfs_fragment_cache *fragment); ++extern struct squashfs_fragment_cache *get_cached_fragment(struct super_block ++ *s, long long start_block, ++ int length); ++extern struct inode *squashfs_iget(struct super_block *s, squashfs_inode_t inode, unsigned int inode_number); ++extern const struct address_space_operations squashfs_symlink_aops; ++extern const struct address_space_operations squashfs_aops; ++extern const struct address_space_operations squashfs_aops_4K; ++extern struct inode_operations squashfs_dir_inode_ops; ++#else ++#define SQSH_EXTERN static ++#endif ++ ++#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++extern int squashfs_1_0_supported(struct squashfs_sb_info *msblk); ++#else ++static inline int squashfs_1_0_supported(struct squashfs_sb_info *msblk) ++{ ++ return 0; ++} ++#endif ++ ++#ifdef CONFIG_SQUASHFS_2_0_COMPATIBILITY ++extern int squashfs_2_0_supported(struct squashfs_sb_info *msblk); ++#else ++static inline int squashfs_2_0_supported(struct squashfs_sb_info *msblk) ++{ ++ return 0; ++} ++#endif +diff --git a/fs/squashfs/squashfs2_0.c b/fs/squashfs/squashfs2_0.c +new file mode 100644 +index 0000000..d8d9d55 +--- /dev/null ++++ b/fs/squashfs/squashfs2_0.c +@@ -0,0 +1,742 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007 ++ * Phillip Lougher <phillip@lougher.org.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs2_0.c ++ */ ++ ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/zlib.h> ++#include <linux/fs.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++ ++#include "squashfs.h" ++static int squashfs_readdir_2(struct file *file, void *dirent, filldir_t filldir); ++static struct dentry *squashfs_lookup_2(struct inode *, struct dentry *, ++ struct nameidata *); ++ ++static struct file_operations squashfs_dir_ops_2 = { ++ .read = generic_read_dir, ++ .readdir = squashfs_readdir_2 ++}; ++ ++static struct inode_operations squashfs_dir_inode_ops_2 = { ++ .lookup = squashfs_lookup_2 ++}; ++ ++static unsigned char squashfs_filetype_table[] = { ++ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK ++}; ++ ++static int read_fragment_index_table_2(struct super_block *s) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ if (!(msblk->fragment_index_2 = kmalloc(SQUASHFS_FRAGMENT_INDEX_BYTES_2 ++ (sblk->fragments), GFP_KERNEL))) { ++ ERROR("Failed to allocate uid/gid table\n"); ++ return 0; ++ } ++ ++ if (SQUASHFS_FRAGMENT_INDEX_BYTES_2(sblk->fragments) && ++ !squashfs_read_data(s, (char *) ++ msblk->fragment_index_2, ++ sblk->fragment_table_start, ++ SQUASHFS_FRAGMENT_INDEX_BYTES_2 ++ (sblk->fragments) | ++ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, SQUASHFS_FRAGMENT_INDEX_BYTES_2(sblk->fragments))) { ++ ERROR("unable to read fragment index table\n"); ++ return 0; ++ } ++ ++ if (msblk->swap) { ++ int i; ++ unsigned int fragment; ++ ++ for (i = 0; i < SQUASHFS_FRAGMENT_INDEXES_2(sblk->fragments); ++ i++) { ++ SQUASHFS_SWAP_FRAGMENT_INDEXES_2((&fragment), ++ &msblk->fragment_index_2[i], 1); ++ msblk->fragment_index_2[i] = fragment; ++ } ++ } ++ ++ return 1; ++} ++ ++ ++static int get_fragment_location_2(struct super_block *s, unsigned int fragment, ++ long long *fragment_start_block, ++ unsigned int *fragment_size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ long long start_block = ++ msblk->fragment_index_2[SQUASHFS_FRAGMENT_INDEX_2(fragment)]; ++ int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET_2(fragment); ++ struct squashfs_fragment_entry_2 fragment_entry; ++ ++ if (msblk->swap) { ++ struct squashfs_fragment_entry_2 sfragment_entry; ++ ++ if (!squashfs_get_cached_block(s, (char *) &sfragment_entry, ++ start_block, offset, ++ sizeof(sfragment_entry), &start_block, ++ &offset)) ++ goto out; ++ SQUASHFS_SWAP_FRAGMENT_ENTRY_2(&fragment_entry, &sfragment_entry); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) &fragment_entry, ++ start_block, offset, ++ sizeof(fragment_entry), &start_block, ++ &offset)) ++ goto out; ++ ++ *fragment_start_block = fragment_entry.start_block; ++ *fragment_size = fragment_entry.size; ++ ++ return 1; ++ ++out: ++ return 0; ++} ++ ++ ++static void squashfs_new_inode(struct squashfs_sb_info *msblk, struct inode *i, ++ struct squashfs_base_inode_header_2 *inodeb, unsigned int ino) ++{ ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ i->i_ino = ino; ++ i->i_mtime.tv_sec = sblk->mkfs_time; ++ i->i_atime.tv_sec = sblk->mkfs_time; ++ i->i_ctime.tv_sec = sblk->mkfs_time; ++ i->i_uid = msblk->uid[inodeb->uid]; ++ i->i_mode = inodeb->mode; ++ i->i_nlink = 1; ++ i->i_size = 0; ++ if (inodeb->guid == SQUASHFS_GUIDS) ++ i->i_gid = i->i_uid; ++ else ++ i->i_gid = msblk->guid[inodeb->guid]; ++} ++ ++ ++static int squashfs_read_inode_2(struct inode *i, squashfs_inode_t inode) ++{ ++ struct super_block *s = i->i_sb; ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ unsigned int block = SQUASHFS_INODE_BLK(inode) + ++ sblk->inode_table_start; ++ unsigned int offset = SQUASHFS_INODE_OFFSET(inode); ++ unsigned int ino = i->i_ino; ++ long long next_block; ++ unsigned int next_offset; ++ union squashfs_inode_header_2 id, sid; ++ struct squashfs_base_inode_header_2 *inodeb = &id.base, ++ *sinodeb = &sid.base; ++ ++ TRACE("Entered squashfs_iget\n"); ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) sinodeb, block, ++ offset, sizeof(*sinodeb), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_BASE_INODE_HEADER_2(inodeb, sinodeb, ++ sizeof(*sinodeb)); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) inodeb, block, ++ offset, sizeof(*inodeb), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ squashfs_new_inode(msblk, i, inodeb, ino); ++ ++ switch(inodeb->inode_type) { ++ case SQUASHFS_FILE_TYPE: { ++ struct squashfs_reg_inode_header_2 *inodep = &id.reg; ++ struct squashfs_reg_inode_header_2 *sinodep = &sid.reg; ++ long long frag_blk; ++ unsigned int frag_size = 0; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_REG_INODE_HEADER_2(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ frag_blk = SQUASHFS_INVALID_BLK; ++ if (inodep->fragment != SQUASHFS_INVALID_FRAG && ++ !get_fragment_location_2(s, ++ inodep->fragment, &frag_blk, &frag_size)) ++ goto failed_read; ++ ++ i->i_size = inodep->file_size; ++ i->i_fop = &generic_ro_fops; ++ i->i_mode |= S_IFREG; ++ i->i_mtime.tv_sec = inodep->mtime; ++ i->i_atime.tv_sec = inodep->mtime; ++ i->i_ctime.tv_sec = inodep->mtime; ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1; ++ SQUASHFS_I(i)->u.s1.fragment_start_block = frag_blk; ++ SQUASHFS_I(i)->u.s1.fragment_size = frag_size; ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep->offset; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->u.s1.block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ if (sblk->block_size > 4096) ++ i->i_data.a_ops = &squashfs_aops; ++ else ++ i->i_data.a_ops = &squashfs_aops_4K; ++ ++ TRACE("File inode %x:%x, start_block %x, " ++ "block_list_start %llx, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, next_block, ++ next_offset); ++ break; ++ } ++ case SQUASHFS_DIR_TYPE: { ++ struct squashfs_dir_inode_header_2 *inodep = &id.dir; ++ struct squashfs_dir_inode_header_2 *sinodep = &sid.dir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DIR_INODE_HEADER_2(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops_2; ++ i->i_fop = &squashfs_dir_ops_2; ++ i->i_mode |= S_IFDIR; ++ i->i_mtime.tv_sec = inodep->mtime; ++ i->i_atime.tv_sec = inodep->mtime; ++ i->i_ctime.tv_sec = inodep->mtime; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = 0; ++ SQUASHFS_I(i)->u.s2.parent_inode = 0; ++ ++ TRACE("Directory inode %x:%x, start_block %x, offset " ++ "%x\n", SQUASHFS_INODE_BLK(inode), ++ offset, inodep->start_block, ++ inodep->offset); ++ break; ++ } ++ case SQUASHFS_LDIR_TYPE: { ++ struct squashfs_ldir_inode_header_2 *inodep = &id.ldir; ++ struct squashfs_ldir_inode_header_2 *sinodep = &sid.ldir; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_LDIR_INODE_HEADER_2(inodep, ++ sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep->file_size; ++ i->i_op = &squashfs_dir_inode_ops_2; ++ i->i_fop = &squashfs_dir_ops_2; ++ i->i_mode |= S_IFDIR; ++ i->i_mtime.tv_sec = inodep->mtime; ++ i->i_atime.tv_sec = inodep->mtime; ++ i->i_ctime.tv_sec = inodep->mtime; ++ SQUASHFS_I(i)->start_block = inodep->start_block; ++ SQUASHFS_I(i)->offset = inodep->offset; ++ SQUASHFS_I(i)->u.s2.directory_index_start = next_block; ++ SQUASHFS_I(i)->u.s2.directory_index_offset = ++ next_offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = ++ inodep->i_count; ++ SQUASHFS_I(i)->u.s2.parent_inode = 0; ++ ++ TRACE("Long directory inode %x:%x, start_block %x, " ++ "offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->start_block, inodep->offset); ++ break; ++ } ++ case SQUASHFS_SYMLINK_TYPE: { ++ struct squashfs_symlink_inode_header_2 *inodep = ++ &id.symlink; ++ struct squashfs_symlink_inode_header_2 *sinodep = ++ &sid.symlink; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_SYMLINK_INODE_HEADER_2(inodep, ++ sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep->symlink_size; ++ i->i_op = &page_symlink_inode_operations; ++ i->i_data.a_ops = &squashfs_symlink_aops; ++ i->i_mode |= S_IFLNK; ++ SQUASHFS_I(i)->start_block = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ ++ TRACE("Symbolic link inode %x:%x, start_block %llx, " ++ "offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_BLKDEV_TYPE: ++ case SQUASHFS_CHRDEV_TYPE: { ++ struct squashfs_dev_inode_header_2 *inodep = &id.dev; ++ struct squashfs_dev_inode_header_2 *sinodep = &sid.dev; ++ ++ if (msblk->swap) { ++ if (!squashfs_get_cached_block(s, (char *) ++ sinodep, block, offset, ++ sizeof(*sinodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DEV_INODE_HEADER_2(inodep, sinodep); ++ } else ++ if (!squashfs_get_cached_block(s, (char *) ++ inodep, block, offset, ++ sizeof(*inodep), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ i->i_mode |= (inodeb->inode_type == ++ SQUASHFS_CHRDEV_TYPE) ? S_IFCHR : ++ S_IFBLK; ++ init_special_inode(i, i->i_mode, ++ old_decode_dev(inodep->rdev)); ++ ++ TRACE("Device inode %x:%x, rdev %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, ++ inodep->rdev); ++ break; ++ } ++ case SQUASHFS_FIFO_TYPE: ++ case SQUASHFS_SOCKET_TYPE: { ++ ++ i->i_mode |= (inodeb->inode_type == SQUASHFS_FIFO_TYPE) ++ ? S_IFIFO : S_IFSOCK; ++ init_special_inode(i, i->i_mode, 0); ++ break; ++ } ++ default: ++ ERROR("Unknown inode type %d in squashfs_iget!\n", ++ inodeb->inode_type); ++ goto failed_read1; ++ } ++ ++ return 1; ++ ++failed_read: ++ ERROR("Unable to read inode [%x:%x]\n", block, offset); ++ ++failed_read1: ++ return 0; ++} ++ ++ ++static int get_dir_index_using_offset(struct super_block *s, long long ++ *next_block, unsigned int *next_offset, ++ long long index_start, ++ unsigned int index_offset, int i_count, ++ long long f_pos) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index_2 index; ++ ++ TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %d\n", ++ i_count, (unsigned int) f_pos); ++ ++ if (f_pos == 0) ++ goto finish; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index_2 sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, ++ index_start, index_offset, ++ sizeof(sindex), &index_start, ++ &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX_2(&index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) &index, ++ index_start, index_offset, ++ sizeof(index), &index_start, ++ &index_offset); ++ ++ if (index.index > f_pos) ++ break; ++ ++ squashfs_get_cached_block(s, NULL, index_start, index_offset, ++ index.size + 1, &index_start, ++ &index_offset); ++ ++ length = index.index; ++ *next_block = index.start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ ++finish: ++ return length; ++} ++ ++ ++static int get_dir_index_using_name(struct super_block *s, long long ++ *next_block, unsigned int *next_offset, ++ long long index_start, ++ unsigned int index_offset, int i_count, ++ const char *name, int size) ++{ ++ struct squashfs_sb_info *msblk = s->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ int i, length = 0; ++ struct squashfs_dir_index_2 *index; ++ char *str; ++ ++ TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); ++ ++ if (!(str = kmalloc(sizeof(struct squashfs_dir_index) + ++ (SQUASHFS_NAME_LEN + 1) * 2, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_index\n"); ++ goto failure; ++ } ++ ++ index = (struct squashfs_dir_index_2 *) (str + SQUASHFS_NAME_LEN + 1); ++ strncpy(str, name, size); ++ str[size] = '\0'; ++ ++ for (i = 0; i < i_count; i++) { ++ if (msblk->swap) { ++ struct squashfs_dir_index_2 sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, ++ index_start, index_offset, ++ sizeof(sindex), &index_start, ++ &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX_2(index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) index, ++ index_start, index_offset, ++ sizeof(struct squashfs_dir_index_2), ++ &index_start, &index_offset); ++ ++ squashfs_get_cached_block(s, index->name, index_start, ++ index_offset, index->size + 1, ++ &index_start, &index_offset); ++ ++ index->name[index->size + 1] = '\0'; ++ ++ if (strcmp(index->name, str) > 0) ++ break; ++ ++ length = index->index; ++ *next_block = index->start_block + sblk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ kfree(str); ++failure: ++ return length; ++} ++ ++ ++static int squashfs_readdir_2(struct file *file, void *dirent, filldir_t filldir) ++{ ++ struct inode *i = file->f_dentry->d_inode; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, ++ dir_count; ++ struct squashfs_dir_header_2 dirh; ++ struct squashfs_dir_entry_2 *dire; ++ ++ TRACE("Entered squashfs_readdir_2 [%llx:%x]\n", next_block, next_offset); ++ ++ if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto finish; ++ } ++ ++ length = get_dir_index_using_offset(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, ++ file->f_pos); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header_2 sdirh; ++ ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, ++ next_block, next_offset, sizeof(sdirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER_2(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh, ++ next_block, next_offset, sizeof(dirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry_2 sdire; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ &sdire, next_block, next_offset, ++ sizeof(sdire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY_2(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ dire, next_block, next_offset, ++ sizeof(*dire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, ++ next_block, next_offset, ++ dire->size + 1, &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (file->f_pos >= length) ++ continue; ++ ++ dire->name[dire->size + 1] = '\0'; ++ ++ TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d)\n", ++ (unsigned int) dirent, dire->name, ++ dire->size + 1, (int) file->f_pos, ++ dirh.start_block, dire->offset, ++ squashfs_filetype_table[dire->type]); ++ ++ if (filldir(dirent, dire->name, dire->size + 1, ++ file->f_pos, SQUASHFS_MK_VFS_INODE( ++ dirh.start_block, dire->offset), ++ squashfs_filetype_table[dire->type]) ++ < 0) { ++ TRACE("Filldir returned less than 0\n"); ++ goto finish; ++ } ++ file->f_pos = length; ++ } ++ } ++ ++finish: ++ kfree(dire); ++ return 0; ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ kfree(dire); ++ return 0; ++} ++ ++ ++static struct dentry *squashfs_lookup_2(struct inode *i, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ const unsigned char *name = dentry->d_name.name; ++ int len = dentry->d_name.len; ++ struct inode *inode = NULL; ++ struct squashfs_sb_info *msblk = i->i_sb->s_fs_info; ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ long long next_block = SQUASHFS_I(i)->start_block + ++ sblk->directory_table_start; ++ int next_offset = SQUASHFS_I(i)->offset, length = 0, ++ dir_count; ++ struct squashfs_dir_header_2 dirh; ++ struct squashfs_dir_entry_2 *dire; ++ int sorted = sblk->s_major == 2 && sblk->s_minor >= 1; ++ ++ TRACE("Entered squashfs_lookup_2 [%llx:%x]\n", next_block, next_offset); ++ ++ if (!(dire = kmalloc(sizeof(struct squashfs_dir_entry) + ++ SQUASHFS_NAME_LEN + 1, GFP_KERNEL))) { ++ ERROR("Failed to allocate squashfs_dir_entry\n"); ++ goto exit_loop; ++ } ++ ++ if (len > SQUASHFS_NAME_LEN) ++ goto exit_loop; ++ ++ length = get_dir_index_using_name(i->i_sb, &next_block, &next_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, ++ SQUASHFS_I(i)->u.s2.directory_index_count, name, ++ len); ++ ++ while (length < i_size_read(i)) { ++ /* read directory header */ ++ if (msblk->swap) { ++ struct squashfs_dir_header_2 sdirh; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, ++ next_block, next_offset, sizeof(sdirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER_2(&dirh, &sdirh); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) &dirh, ++ next_block, next_offset, sizeof(dirh), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while (dir_count--) { ++ if (msblk->swap) { ++ struct squashfs_dir_entry_2 sdire; ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ &sdire, next_block,next_offset, ++ sizeof(sdire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY_2(dire, &sdire); ++ } else { ++ if (!squashfs_get_cached_block(i->i_sb, (char *) ++ dire, next_block,next_offset, ++ sizeof(*dire), &next_block, ++ &next_offset)) ++ goto failed_read; ++ ++ length += sizeof(*dire); ++ } ++ ++ if (!squashfs_get_cached_block(i->i_sb, dire->name, ++ next_block, next_offset, dire->size + 1, ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ length += dire->size + 1; ++ ++ if (sorted && name[0] < dire->name[0]) ++ goto exit_loop; ++ ++ if ((len == dire->size + 1) && !strncmp(name, ++ dire->name, len)) { ++ squashfs_inode_t ino = ++ SQUASHFS_MKINODE(dirh.start_block, ++ dire->offset); ++ unsigned int inode_number = SQUASHFS_MK_VFS_INODE(dirh.start_block, ++ dire->offset); ++ ++ TRACE("calling squashfs_iget for directory " ++ "entry %s, inode %x:%x, %lld\n", name, ++ dirh.start_block, dire->offset, ino); ++ ++ inode = squashfs_iget(i->i_sb, ino, inode_number); ++ ++ goto exit_loop; ++ } ++ } ++ } ++ ++exit_loop: ++ kfree(dire); ++ d_add(dentry, inode); ++ return ERR_PTR(0); ++ ++failed_read: ++ ERROR("Unable to read directory block [%llx:%x]\n", next_block, ++ next_offset); ++ goto exit_loop; ++} ++ ++ ++int squashfs_2_0_supported(struct squashfs_sb_info *msblk) ++{ ++ struct squashfs_super_block *sblk = &msblk->sblk; ++ ++ msblk->read_inode = squashfs_read_inode_2; ++ msblk->read_fragment_index_table = read_fragment_index_table_2; ++ ++ sblk->bytes_used = sblk->bytes_used_2; ++ sblk->uid_start = sblk->uid_start_2; ++ sblk->guid_start = sblk->guid_start_2; ++ sblk->inode_table_start = sblk->inode_table_start_2; ++ sblk->directory_table_start = sblk->directory_table_start_2; ++ sblk->fragment_table_start = sblk->fragment_table_start_2; ++ ++ return 1; ++} +diff --git a/include/linux/aufs_type.h b/include/linux/aufs_type.h +new file mode 100755 +index 0000000..8b4629e +--- /dev/null ++++ b/include/linux/aufs_type.h +@@ -0,0 +1,97 @@ ++/* ++ * Copyright (C) 2005, 2006, 2007 Junjiro Okajima ++ * ++ * This program, aufs is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/* $Id: aufs_type.h,v 1.55 2007/05/14 03:40:57 sfjro Exp $ */ ++ ++#include <linux/ioctl.h> ++ ++#ifndef __AUFS_TYPE_H__ ++#define __AUFS_TYPE_H__ ++ ++#define AUFS_VERSION "20070514" ++ ++/* ---------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_AUFS_BRANCH_MAX_127 ++typedef char aufs_bindex_t; ++#define AUFS_BRANCH_MAX 127 ++#else ++typedef short aufs_bindex_t; ++#ifdef CONFIG_AUFS_BRANCH_MAX_511 ++#define AUFS_BRANCH_MAX 511 ++#elif defined(CONFIG_AUFS_BRANCH_MAX_1023) ++#define AUFS_BRANCH_MAX 1023 ++#elif defined(CONFIG_AUFS_BRANCH_MAX_32767) ++#define AUFS_BRANCH_MAX 32767 ++#else ++#error unknown CONFIG_AUFS_BRANCH_MAX value ++#endif ++#endif ++ ++#define AUFS_NAME "aufs" ++#define AUFS_FSTYPE AUFS_NAME ++ ++#define AUFS_ROOT_INO 2 ++#define AUFS_FIRST_INO 11 ++ ++#define AUFS_WH_PFX ".wh." ++#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1) ++#define AUFS_XINO_FNAME "." AUFS_NAME ".xino" ++#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME ++#define AUFS_DIRWH_DEF 3 ++#define AUFS_RDCACHE_DEF 10 /* seconds */ ++#define AUFS_WKQ_NAME AUFS_NAME "d" ++#define AUFS_NWKQ_DEF 4 ++ ++#ifdef CONFIG_AUFS_COMPAT ++#define AUFS_DIROPQ_NAME "__dir_opaque" ++#else ++#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */ ++#endif ++#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME ++ ++/* will be whiteouted doubly */ ++#define AUFS_WH_BASENAME AUFS_WH_PFX AUFS_NAME ++#define AUFS_WH_PLINKDIR AUFS_WH_PFX "plink" ++ ++/* ---------------------------------------------------------------------- */ ++ ++/* ioctl */ ++enum {AuCtlErr, AuCtlErr_Last}; ++enum { ++ AuCtl_REFRESH, //AuCtl_REFRESHV, ++ //AuCtl_FLUSH_PLINK, ++ //AuCtl_CPUP, ++ AuCtl_CPDOWN, AuCtl_MVDOWN ++}; ++ ++struct aufs_ctl_cp { ++ int bsrc, bdst; ++ int err; ++}; ++ ++#define Type 'A' ++#define AUFS_CTL_REFRESH _IO(Type, AuCtl_REFRESH) ++//#define AUFS_CTL_REFRESHV _IO(Type, AuCtl_REFRESHV) ++//#define AUFS_CTL_FLUSH_PLINK _IOR(Type, AuCtl_FLUSH_PLINK) ++//#define AUFS_CTL_CPUP _IOWR(Type, AuCtl_CPUP, struct aufs_ctl_cp) ++#define AUFS_CTL_CPDOWN _IOWR(Type, AuCtl_CPDOWN, struct aufs_ctl_cp) ++#define AUFS_CTL_MVDOWN _IOWR(Type, AuCtl_MVDOWN, struct aufs_ctl_cp) ++#undef Type ++ ++#endif /* __AUFS_TYPE_H__ */ +diff --git a/include/linux/squashfs_fs.h b/include/linux/squashfs_fs.h +new file mode 100644 +index 0000000..a9380ad +--- /dev/null ++++ b/include/linux/squashfs_fs.h +@@ -0,0 +1,934 @@ ++#ifndef SQUASHFS_FS ++#define SQUASHFS_FS ++ ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007 ++ * Phillip Lougher <phillip@lougher.org.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs.h ++ */ ++ ++#ifndef CONFIG_SQUASHFS_2_0_COMPATIBILITY ++#define CONFIG_SQUASHFS_2_0_COMPATIBILITY ++#endif ++ ++#ifdef CONFIG_SQUASHFS_VMALLOC ++#define SQUASHFS_ALLOC(a) vmalloc(a) ++#define SQUASHFS_FREE(a) vfree(a) ++#else ++#define SQUASHFS_ALLOC(a) kmalloc(a, GFP_KERNEL) ++#define SQUASHFS_FREE(a) kfree(a) ++#endif ++#define SQUASHFS_CACHED_FRAGMENTS CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE ++#define SQUASHFS_MAJOR 3 ++#define SQUASHFS_MINOR 0 ++#define SQUASHFS_MAGIC 0x73717368 ++#define SQUASHFS_MAGIC_SWAP 0x68737173 ++#define SQUASHFS_START 0 ++ ++/* size of metadata (inode and directory) blocks */ ++#define SQUASHFS_METADATA_SIZE 8192 ++#define SQUASHFS_METADATA_LOG 13 ++ ++/* default size of data blocks */ ++#define SQUASHFS_FILE_SIZE 65536 ++#define SQUASHFS_FILE_LOG 16 ++ ++#define SQUASHFS_FILE_MAX_SIZE 65536 ++ ++/* Max number of uids and gids */ ++#define SQUASHFS_UIDS 256 ++#define SQUASHFS_GUIDS 255 ++ ++/* Max length of filename (not 255) */ ++#define SQUASHFS_NAME_LEN 256 ++ ++#define SQUASHFS_INVALID ((long long) 0xffffffffffff) ++#define SQUASHFS_INVALID_FRAG ((unsigned int) 0xffffffff) ++#define SQUASHFS_INVALID_BLK ((long long) -1) ++#define SQUASHFS_USED_BLK ((long long) -2) ++ ++/* Filesystem flags */ ++#define SQUASHFS_NOI 0 ++#define SQUASHFS_NOD 1 ++#define SQUASHFS_CHECK 2 ++#define SQUASHFS_NOF 3 ++#define SQUASHFS_NO_FRAG 4 ++#define SQUASHFS_ALWAYS_FRAG 5 ++#define SQUASHFS_DUPLICATE 6 ++#define SQUASHFS_EXPORT 7 ++ ++#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) ++ ++#define SQUASHFS_UNCOMPRESSED_INODES(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NOI) ++ ++#define SQUASHFS_UNCOMPRESSED_DATA(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NOD) ++ ++#define SQUASHFS_UNCOMPRESSED_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NOF) ++ ++#define SQUASHFS_NO_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_NO_FRAG) ++ ++#define SQUASHFS_ALWAYS_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_ALWAYS_FRAG) ++ ++#define SQUASHFS_DUPLICATES(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_DUPLICATE) ++ ++#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_EXPORT) ++ ++#define SQUASHFS_CHECK_DATA(flags) SQUASHFS_BIT(flags, \ ++ SQUASHFS_CHECK) ++ ++#define SQUASHFS_MKFLAGS(noi, nod, check_data, nof, no_frag, always_frag, \ ++ duplicate_checking, exortable) (noi | (nod << 1) | (check_data << 2) \ ++ | (nof << 3) | (no_frag << 4) | (always_frag << 5) | \ ++ (duplicate_checking << 6) | (exportable << 7)) ++ ++/* Max number of types and file types */ ++#define SQUASHFS_DIR_TYPE 1 ++#define SQUASHFS_FILE_TYPE 2 ++#define SQUASHFS_SYMLINK_TYPE 3 ++#define SQUASHFS_BLKDEV_TYPE 4 ++#define SQUASHFS_CHRDEV_TYPE 5 ++#define SQUASHFS_FIFO_TYPE 6 ++#define SQUASHFS_SOCKET_TYPE 7 ++#define SQUASHFS_LDIR_TYPE 8 ++#define SQUASHFS_LREG_TYPE 9 ++ ++/* 1.0 filesystem type definitions */ ++#define SQUASHFS_TYPES 5 ++#define SQUASHFS_IPC_TYPE 0 ++ ++/* Flag whether block is compressed or uncompressed, bit is set if block is ++ * uncompressed */ ++#define SQUASHFS_COMPRESSED_BIT (1 << 15) ++ ++#define SQUASHFS_COMPRESSED_SIZE(B) (((B) & ~SQUASHFS_COMPRESSED_BIT) ? \ ++ (B) & ~SQUASHFS_COMPRESSED_BIT : SQUASHFS_COMPRESSED_BIT) ++ ++#define SQUASHFS_COMPRESSED(B) (!((B) & SQUASHFS_COMPRESSED_BIT)) ++ ++#define SQUASHFS_COMPRESSED_BIT_BLOCK (1 << 24) ++ ++#define SQUASHFS_COMPRESSED_SIZE_BLOCK(B) (((B) & \ ++ ~SQUASHFS_COMPRESSED_BIT_BLOCK) ? (B) & \ ++ ~SQUASHFS_COMPRESSED_BIT_BLOCK : SQUASHFS_COMPRESSED_BIT_BLOCK) ++ ++#define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) ++ ++/* ++ * Inode number ops. Inodes consist of a compressed block number, and an ++ * uncompressed offset within that block ++ */ ++#define SQUASHFS_INODE_BLK(a) ((unsigned int) ((a) >> 16)) ++ ++#define SQUASHFS_INODE_OFFSET(a) ((unsigned int) ((a) & 0xffff)) ++ ++#define SQUASHFS_MKINODE(A, B) ((squashfs_inode_t)(((squashfs_inode_t) (A)\ ++ << 16) + (B))) ++ ++/* Compute 32 bit VFS inode number from squashfs inode number */ ++#define SQUASHFS_MK_VFS_INODE(a, b) ((unsigned int) (((a) << 8) + \ ++ ((b) >> 2) + 1)) ++/* XXX */ ++ ++/* Translate between VFS mode and squashfs mode */ ++#define SQUASHFS_MODE(a) ((a) & 0xfff) ++ ++/* fragment and fragment table defines */ ++#define SQUASHFS_FRAGMENT_BYTES(A) ((A) * sizeof(struct squashfs_fragment_entry)) ++ ++#define SQUASHFS_FRAGMENT_INDEX(A) (SQUASHFS_FRAGMENT_BYTES(A) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_OFFSET(A) (SQUASHFS_FRAGMENT_BYTES(A) % \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEXES(A) ((SQUASHFS_FRAGMENT_BYTES(A) + \ ++ SQUASHFS_METADATA_SIZE - 1) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_BYTES(A) (SQUASHFS_FRAGMENT_INDEXES(A) *\ ++ sizeof(long long)) ++ ++/* inode lookup table defines */ ++#define SQUASHFS_LOOKUP_BYTES(A) ((A) * sizeof(squashfs_inode_t)) ++ ++#define SQUASHFS_LOOKUP_BLOCK(A) (SQUASHFS_LOOKUP_BYTES(A) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_LOOKUP_BLOCK_OFFSET(A) (SQUASHFS_LOOKUP_BYTES(A) % \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_LOOKUP_BLOCKS(A) ((SQUASHFS_LOOKUP_BYTES(A) + \ ++ SQUASHFS_METADATA_SIZE - 1) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_LOOKUP_BLOCK_BYTES(A) (SQUASHFS_LOOKUP_BLOCKS(A) *\ ++ sizeof(long long)) ++ ++/* cached data constants for filesystem */ ++#define SQUASHFS_CACHED_BLKS 8 ++ ++#define SQUASHFS_MAX_FILE_SIZE_LOG 64 ++ ++#define SQUASHFS_MAX_FILE_SIZE ((long long) 1 << \ ++ (SQUASHFS_MAX_FILE_SIZE_LOG - 2)) ++ ++#define SQUASHFS_MARKER_BYTE 0xff ++ ++/* meta index cache */ ++#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int)) ++#define SQUASHFS_META_ENTRIES 31 ++#define SQUASHFS_META_NUMBER 8 ++#define SQUASHFS_SLOTS 4 ++ ++struct meta_entry { ++ long long data_block; ++ unsigned int index_block; ++ unsigned short offset; ++ unsigned short pad; ++}; ++ ++struct meta_index { ++ unsigned int inode_number; ++ unsigned int offset; ++ unsigned short entries; ++ unsigned short skip; ++ unsigned short locked; ++ unsigned short pad; ++ struct meta_entry meta_entry[SQUASHFS_META_ENTRIES]; ++}; ++ ++ ++/* ++ * definitions for structures on disk ++ */ ++ ++typedef long long squashfs_block_t; ++typedef long long squashfs_inode_t; ++ ++struct squashfs_super_block { ++ unsigned int s_magic; ++ unsigned int inodes; ++ unsigned int bytes_used_2; ++ unsigned int uid_start_2; ++ unsigned int guid_start_2; ++ unsigned int inode_table_start_2; ++ unsigned int directory_table_start_2; ++ unsigned int s_major:16; ++ unsigned int s_minor:16; ++ unsigned int block_size_1:16; ++ unsigned int block_log:16; ++ unsigned int flags:8; ++ unsigned int no_uids:8; ++ unsigned int no_guids:8; ++ unsigned int mkfs_time /* time of filesystem creation */; ++ squashfs_inode_t root_inode; ++ unsigned int block_size; ++ unsigned int fragments; ++ unsigned int fragment_table_start_2; ++ long long bytes_used; ++ long long uid_start; ++ long long guid_start; ++ long long inode_table_start; ++ long long directory_table_start; ++ long long fragment_table_start; ++ long long lookup_table_start; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_index { ++ unsigned int index; ++ unsigned int start_block; ++ unsigned char size; ++ unsigned char name[0]; ++} __attribute__ ((packed)); ++ ++#define SQUASHFS_BASE_INODE_HEADER \ ++ unsigned int inode_type:4; \ ++ unsigned int mode:12; \ ++ unsigned int uid:8; \ ++ unsigned int guid:8; \ ++ unsigned int mtime; \ ++ unsigned int inode_number; ++ ++struct squashfs_base_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++} __attribute__ ((packed)); ++ ++struct squashfs_ipc_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++} __attribute__ ((packed)); ++ ++struct squashfs_dev_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ unsigned short rdev; ++} __attribute__ ((packed)); ++ ++struct squashfs_symlink_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ unsigned short symlink_size; ++ char symlink[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_reg_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ squashfs_block_t start_block; ++ unsigned int fragment; ++ unsigned int offset; ++ unsigned int file_size; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_lreg_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ squashfs_block_t start_block; ++ unsigned int fragment; ++ unsigned int offset; ++ long long file_size; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ unsigned int file_size:19; ++ unsigned int offset:13; ++ unsigned int start_block; ++ unsigned int parent_inode; ++} __attribute__ ((packed)); ++ ++struct squashfs_ldir_inode_header { ++ SQUASHFS_BASE_INODE_HEADER; ++ unsigned int nlink; ++ unsigned int file_size:27; ++ unsigned int offset:13; ++ unsigned int start_block; ++ unsigned int i_count:16; ++ unsigned int parent_inode; ++ struct squashfs_dir_index index[0]; ++} __attribute__ ((packed)); ++ ++union squashfs_inode_header { ++ struct squashfs_base_inode_header base; ++ struct squashfs_dev_inode_header dev; ++ struct squashfs_symlink_inode_header symlink; ++ struct squashfs_reg_inode_header reg; ++ struct squashfs_lreg_inode_header lreg; ++ struct squashfs_dir_inode_header dir; ++ struct squashfs_ldir_inode_header ldir; ++ struct squashfs_ipc_inode_header ipc; ++}; ++ ++struct squashfs_dir_entry { ++ unsigned int offset:13; ++ unsigned int type:3; ++ unsigned int size:8; ++ int inode_number:16; ++ char name[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_header { ++ unsigned int count:8; ++ unsigned int start_block; ++ unsigned int inode_number; ++} __attribute__ ((packed)); ++ ++struct squashfs_fragment_entry { ++ long long start_block; ++ unsigned int size; ++ unsigned int pending; ++} __attribute__ ((packed)); ++ ++extern int squashfs_uncompress_block(void *d, int dstlen, void *s, int srclen); ++extern int squashfs_uncompress_init(void); ++extern int squashfs_uncompress_exit(void); ++ ++/* ++ * macros to convert each packed bitfield structure from little endian to big ++ * endian and vice versa. These are needed when creating or using a filesystem ++ * on a machine with different byte ordering to the target architecture. ++ * ++ */ ++ ++#define SQUASHFS_SWAP_START \ ++ int bits;\ ++ int b_pos;\ ++ unsigned long long val;\ ++ unsigned char *s;\ ++ unsigned char *d; ++ ++#define SQUASHFS_SWAP_SUPER_BLOCK(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_super_block));\ ++ SQUASHFS_SWAP((s)->s_magic, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->inodes, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->bytes_used_2, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->uid_start_2, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->guid_start_2, d, 128, 32);\ ++ SQUASHFS_SWAP((s)->inode_table_start_2, d, 160, 32);\ ++ SQUASHFS_SWAP((s)->directory_table_start_2, d, 192, 32);\ ++ SQUASHFS_SWAP((s)->s_major, d, 224, 16);\ ++ SQUASHFS_SWAP((s)->s_minor, d, 240, 16);\ ++ SQUASHFS_SWAP((s)->block_size_1, d, 256, 16);\ ++ SQUASHFS_SWAP((s)->block_log, d, 272, 16);\ ++ SQUASHFS_SWAP((s)->flags, d, 288, 8);\ ++ SQUASHFS_SWAP((s)->no_uids, d, 296, 8);\ ++ SQUASHFS_SWAP((s)->no_guids, d, 304, 8);\ ++ SQUASHFS_SWAP((s)->mkfs_time, d, 312, 32);\ ++ SQUASHFS_SWAP((s)->root_inode, d, 344, 64);\ ++ SQUASHFS_SWAP((s)->block_size, d, 408, 32);\ ++ SQUASHFS_SWAP((s)->fragments, d, 440, 32);\ ++ SQUASHFS_SWAP((s)->fragment_table_start_2, d, 472, 32);\ ++ SQUASHFS_SWAP((s)->bytes_used, d, 504, 64);\ ++ SQUASHFS_SWAP((s)->uid_start, d, 568, 64);\ ++ SQUASHFS_SWAP((s)->guid_start, d, 632, 64);\ ++ SQUASHFS_SWAP((s)->inode_table_start, d, 696, 64);\ ++ SQUASHFS_SWAP((s)->directory_table_start, d, 760, 64);\ ++ SQUASHFS_SWAP((s)->fragment_table_start, d, 824, 64);\ ++ SQUASHFS_SWAP((s)->lookup_table_start, d, 888, 64);\ ++} ++ ++#define SQUASHFS_SWAP_BASE_INODE_CORE(s, d, n)\ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 8);\ ++ SQUASHFS_SWAP((s)->guid, d, 24, 8);\ ++ SQUASHFS_SWAP((s)->mtime, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->inode_number, d, 64, 32); ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, n) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, n)\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_ipc_inode_header))\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_dev_inode_header)); \ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->rdev, d, 128, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_symlink_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 128, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_reg_inode_header));\ ++ SQUASHFS_SWAP((s)->start_block, d, 96, 64);\ ++ SQUASHFS_SWAP((s)->fragment, d, 160, 32);\ ++ SQUASHFS_SWAP((s)->offset, d, 192, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 224, 32);\ ++} ++ ++#define SQUASHFS_SWAP_LREG_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_lreg_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 128, 64);\ ++ SQUASHFS_SWAP((s)->fragment, d, 192, 32);\ ++ SQUASHFS_SWAP((s)->offset, d, 224, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 256, 64);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_dir_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 128, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 147, 13);\ ++ SQUASHFS_SWAP((s)->start_block, d, 160, 32);\ ++ SQUASHFS_SWAP((s)->parent_inode, d, 192, 32);\ ++} ++ ++#define SQUASHFS_SWAP_LDIR_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE(s, d, \ ++ sizeof(struct squashfs_ldir_inode_header));\ ++ SQUASHFS_SWAP((s)->nlink, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 128, 27);\ ++ SQUASHFS_SWAP((s)->offset, d, 155, 13);\ ++ SQUASHFS_SWAP((s)->start_block, d, 168, 32);\ ++ SQUASHFS_SWAP((s)->i_count, d, 200, 16);\ ++ SQUASHFS_SWAP((s)->parent_inode, d, 216, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INDEX(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_index));\ ++ SQUASHFS_SWAP((s)->index, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->size, d, 64, 8);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_HEADER(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_header));\ ++ SQUASHFS_SWAP((s)->count, d, 0, 8);\ ++ SQUASHFS_SWAP((s)->start_block, d, 8, 32);\ ++ SQUASHFS_SWAP((s)->inode_number, d, 40, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_ENTRY(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_entry));\ ++ SQUASHFS_SWAP((s)->offset, d, 0, 13);\ ++ SQUASHFS_SWAP((s)->type, d, 13, 3);\ ++ SQUASHFS_SWAP((s)->size, d, 16, 8);\ ++ SQUASHFS_SWAP((s)->inode_number, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_ENTRY(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_fragment_entry));\ ++ SQUASHFS_SWAP((s)->start_block, d, 0, 64);\ ++ SQUASHFS_SWAP((s)->size, d, 64, 32);\ ++} ++ ++#define SQUASHFS_SWAP_INODE_T(s, d) SQUASHFS_SWAP_LONG_LONGS(s, d, 1) ++ ++#define SQUASHFS_SWAP_SHORTS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * 2);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ 16)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 16);\ ++} ++ ++#define SQUASHFS_SWAP_INTS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * 4);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ 32)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 32);\ ++} ++ ++#define SQUASHFS_SWAP_LONG_LONGS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * 8);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ 64)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 64);\ ++} ++ ++#define SQUASHFS_SWAP_DATA(s, d, n, bits) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, n * bits / 8);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += \ ++ bits)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, bits);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_INDEXES(s, d, n) SQUASHFS_SWAP_LONG_LONGS(s, d, n) ++#define SQUASHFS_SWAP_LOOKUP_BLOCKS(s, d, n) SQUASHFS_SWAP_LONG_LONGS(s, d, n) ++ ++#ifdef CONFIG_SQUASHFS_1_0_COMPATIBILITY ++ ++struct squashfs_base_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++} __attribute__ ((packed)); ++ ++struct squashfs_ipc_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int type:4; ++ unsigned int offset:4; ++} __attribute__ ((packed)); ++ ++struct squashfs_dev_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned short rdev; ++} __attribute__ ((packed)); ++ ++struct squashfs_symlink_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned short symlink_size; ++ char symlink[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_reg_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int mtime; ++ unsigned int start_block; ++ unsigned int file_size:32; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_inode_header_1 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int file_size:19; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++} __attribute__ ((packed)); ++ ++#define SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, n) \ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 4);\ ++ SQUASHFS_SWAP((s)->guid, d, 20, 4); ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER_1(s, d, n) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, n)\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_ipc_inode_header_1));\ ++ SQUASHFS_SWAP((s)->type, d, 24, 4);\ ++ SQUASHFS_SWAP((s)->offset, d, 28, 4);\ ++} ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_dev_inode_header_1));\ ++ SQUASHFS_SWAP((s)->rdev, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_symlink_inode_header_1));\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_reg_inode_header_1));\ ++ SQUASHFS_SWAP((s)->mtime, d, 24, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 56, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 88, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_1(s, d, \ ++ sizeof(struct squashfs_dir_inode_header_1));\ ++ SQUASHFS_SWAP((s)->file_size, d, 24, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 43, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 56, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 88, 24);\ ++} ++ ++#endif ++ ++#ifdef CONFIG_SQUASHFS_2_0_COMPATIBILITY ++ ++struct squashfs_dir_index_2 { ++ unsigned int index:27; ++ unsigned int start_block:29; ++ unsigned char size; ++ unsigned char name[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_base_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++} __attribute__ ((packed)); ++ ++struct squashfs_ipc_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++} __attribute__ ((packed)); ++ ++struct squashfs_dev_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned short rdev; ++} __attribute__ ((packed)); ++ ++struct squashfs_symlink_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned short symlink_size; ++ char symlink[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_reg_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int mtime; ++ unsigned int start_block; ++ unsigned int fragment; ++ unsigned int offset; ++ unsigned int file_size:32; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int file_size:19; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++} __attribute__ ((packed)); ++ ++struct squashfs_ldir_inode_header_2 { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int file_size:27; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++ unsigned int i_count:16; ++ struct squashfs_dir_index_2 index[0]; ++} __attribute__ ((packed)); ++ ++union squashfs_inode_header_2 { ++ struct squashfs_base_inode_header_2 base; ++ struct squashfs_dev_inode_header_2 dev; ++ struct squashfs_symlink_inode_header_2 symlink; ++ struct squashfs_reg_inode_header_2 reg; ++ struct squashfs_dir_inode_header_2 dir; ++ struct squashfs_ldir_inode_header_2 ldir; ++ struct squashfs_ipc_inode_header_2 ipc; ++}; ++ ++struct squashfs_dir_header_2 { ++ unsigned int count:8; ++ unsigned int start_block:24; ++} __attribute__ ((packed)); ++ ++struct squashfs_dir_entry_2 { ++ unsigned int offset:13; ++ unsigned int type:3; ++ unsigned int size:8; ++ char name[0]; ++} __attribute__ ((packed)); ++ ++struct squashfs_fragment_entry_2 { ++ unsigned int start_block; ++ unsigned int size; ++} __attribute__ ((packed)); ++ ++#define SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, n)\ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 8);\ ++ SQUASHFS_SWAP((s)->guid, d, 24, 8);\ ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER_2(s, d, n) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, n)\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER_2(s, d) \ ++ SQUASHFS_SWAP_BASE_INODE_HEADER_2(s, d, sizeof(struct squashfs_ipc_inode_header_2)) ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_dev_inode_header_2)); \ ++ SQUASHFS_SWAP((s)->rdev, d, 32, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_symlink_inode_header_2));\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 32, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_reg_inode_header_2));\ ++ SQUASHFS_SWAP((s)->mtime, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->fragment, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->offset, d, 128, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 160, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_dir_inode_header_2));\ ++ SQUASHFS_SWAP((s)->file_size, d, 32, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 51, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 96, 24);\ ++} ++ ++#define SQUASHFS_SWAP_LDIR_INODE_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_SWAP_BASE_INODE_CORE_2(s, d, \ ++ sizeof(struct squashfs_ldir_inode_header_2));\ ++ SQUASHFS_SWAP((s)->file_size, d, 32, 27);\ ++ SQUASHFS_SWAP((s)->offset, d, 59, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 72, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 104, 24);\ ++ SQUASHFS_SWAP((s)->i_count, d, 128, 16);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INDEX_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_index_2));\ ++ SQUASHFS_SWAP((s)->index, d, 0, 27);\ ++ SQUASHFS_SWAP((s)->start_block, d, 27, 29);\ ++ SQUASHFS_SWAP((s)->size, d, 56, 8);\ ++} ++#define SQUASHFS_SWAP_DIR_HEADER_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_header_2));\ ++ SQUASHFS_SWAP((s)->count, d, 0, 8);\ ++ SQUASHFS_SWAP((s)->start_block, d, 8, 24);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_ENTRY_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_dir_entry_2));\ ++ SQUASHFS_SWAP((s)->offset, d, 0, 13);\ ++ SQUASHFS_SWAP((s)->type, d, 13, 3);\ ++ SQUASHFS_SWAP((s)->size, d, 16, 8);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_ENTRY_2(s, d) {\ ++ SQUASHFS_SWAP_START\ ++ SQUASHFS_MEMSET(s, d, sizeof(struct squashfs_fragment_entry_2));\ ++ SQUASHFS_SWAP((s)->start_block, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->size, d, 32, 32);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_INDEXES_2(s, d, n) SQUASHFS_SWAP_INTS(s, d, n) ++ ++/* fragment and fragment table defines */ ++#define SQUASHFS_FRAGMENT_BYTES_2(A) (A * sizeof(struct squashfs_fragment_entry_2)) ++ ++#define SQUASHFS_FRAGMENT_INDEX_2(A) (SQUASHFS_FRAGMENT_BYTES_2(A) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_OFFSET_2(A) (SQUASHFS_FRAGMENT_BYTES_2(A) % \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEXES_2(A) ((SQUASHFS_FRAGMENT_BYTES_2(A) + \ ++ SQUASHFS_METADATA_SIZE - 1) / \ ++ SQUASHFS_METADATA_SIZE) ++ ++#define SQUASHFS_FRAGMENT_INDEX_BYTES_2(A) (SQUASHFS_FRAGMENT_INDEXES_2(A) *\ ++ sizeof(int)) ++ ++#endif ++ ++#ifdef __KERNEL__ ++ ++/* ++ * macros used to swap each structure entry, taking into account ++ * bitfields and different bitfield placing conventions on differing ++ * architectures ++ */ ++ ++#include <asm/byteorder.h> ++ ++#ifdef __BIG_ENDIAN ++ /* convert from little endian to big endian */ ++#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, \ ++ tbits, b_pos) ++#else ++ /* convert from big endian to little endian */ ++#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, \ ++ tbits, 64 - tbits - b_pos) ++#endif ++ ++#define _SQUASHFS_SWAP(value, p, pos, tbits, SHIFT) {\ ++ b_pos = pos % 8;\ ++ val = 0;\ ++ s = (unsigned char *)p + (pos / 8);\ ++ d = ((unsigned char *) &val) + 7;\ ++ for(bits = 0; bits < (tbits + b_pos); bits += 8) \ ++ *d-- = *s++;\ ++ value = (val >> (SHIFT))/* & ((1 << tbits) - 1)*/;\ ++} ++ ++#define SQUASHFS_MEMSET(s, d, n) memset(s, 0, n); ++ ++#endif ++#endif +diff --git a/include/linux/squashfs_fs_i.h b/include/linux/squashfs_fs_i.h +new file mode 100644 +index 0000000..798891a +--- /dev/null ++++ b/include/linux/squashfs_fs_i.h +@@ -0,0 +1,45 @@ ++#ifndef SQUASHFS_FS_I ++#define SQUASHFS_FS_I ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007 ++ * Phillip Lougher <phillip@lougher.org.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs_i.h ++ */ ++ ++struct squashfs_inode_info { ++ long long start_block; ++ unsigned int offset; ++ union { ++ struct { ++ long long fragment_start_block; ++ unsigned int fragment_size; ++ unsigned int fragment_offset; ++ long long block_list_start; ++ } s1; ++ struct { ++ long long directory_index_start; ++ unsigned int directory_index_offset; ++ unsigned int directory_index_count; ++ unsigned int parent_inode; ++ } s2; ++ } u; ++ struct inode vfs_inode; ++}; ++#endif +diff --git a/include/linux/squashfs_fs_sb.h b/include/linux/squashfs_fs_sb.h +new file mode 100644 +index 0000000..8f3bf99 +--- /dev/null ++++ b/include/linux/squashfs_fs_sb.h +@@ -0,0 +1,74 @@ ++#ifndef SQUASHFS_FS_SB ++#define SQUASHFS_FS_SB ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007 ++ * Phillip Lougher <phillip@lougher.org.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs_sb.h ++ */ ++ ++#include <linux/squashfs_fs.h> ++ ++struct squashfs_cache { ++ long long block; ++ int length; ++ long long next_index; ++ char *data; ++}; ++ ++struct squashfs_fragment_cache { ++ long long block; ++ int length; ++ unsigned int locked; ++ char *data; ++}; ++ ++struct squashfs_sb_info { ++ struct squashfs_super_block sblk; ++ int devblksize; ++ int devblksize_log2; ++ int swap; ++ struct squashfs_cache *block_cache; ++ struct squashfs_fragment_cache *fragment; ++ int next_cache; ++ int next_fragment; ++ int next_meta_index; ++ unsigned int *uid; ++ unsigned int *guid; ++ long long *fragment_index; ++ unsigned int *fragment_index_2; ++ char *read_page; ++ struct mutex read_data_mutex; ++ struct mutex read_page_mutex; ++ struct mutex block_cache_mutex; ++ struct mutex fragment_mutex; ++ struct mutex meta_index_mutex; ++ wait_queue_head_t waitq; ++ wait_queue_head_t fragment_wait_queue; ++ struct meta_index *meta_index; ++ z_stream stream; ++ long long *inode_lookup_table; ++ int (*read_inode)(struct inode *i, squashfs_inode_t \ ++ inode); ++ long long (*read_blocklist)(struct inode *inode, int \ ++ index, int readahead_blks, char *block_list, \ ++ unsigned short **block_p, unsigned int *bsize); ++ int (*read_fragment_index_table)(struct super_block *s); ++}; ++#endif +diff --git a/init/Kconfig b/init/Kconfig +index b170aa1..bcfc3b4 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -244,23 +244,21 @@ config AUDITSYSCALL + ensure that INOTIFY is configured. + + config IKCONFIG +- tristate "Kernel .config support" ++ tristate "Kernel .miniconfig support" + ---help--- +- This option enables the complete Linux kernel ".config" file ++ This option enables the mini Linux kernel ".miniconfig" file + contents to be saved in the kernel. It provides documentation + of which kernel options are used in a running kernel or in an +- on-disk kernel. This information can be extracted from the kernel +- image file with the script scripts/extract-ikconfig and used as +- input to rebuild the current kernel or to build another kernel. +- It can also be extracted from a running kernel by reading +- /proc/config.gz if enabled (below). ++ on-disk kernel. ++ It can be extracted from a running kernel by reading ++ /proc/miniconfig.gz if enabled (below). + + config IKCONFIG_PROC +- bool "Enable access to .config through /proc/config.gz" ++ bool "Enable access to .miniconfig through /proc/miniconfig.gz" + depends on IKCONFIG && PROC_FS + ---help--- + This option enables access to the kernel configuration file +- through /proc/config.gz. ++ through /proc/miniconfig.gz. + + config CPUSETS + bool "Cpuset support" +diff --git a/init/LzmaDecode.c b/init/LzmaDecode.c +new file mode 100644 +index 0000000..21bf40b +--- /dev/null ++++ b/init/LzmaDecode.c +@@ -0,0 +1,588 @@ ++/* ++ LzmaDecode.c ++ LZMA Decoder (optimized for Speed version) ++ ++ LZMA SDK 4.22 Copyright (c) 1999-2005 Igor Pavlov (2005-06-10) ++ http://www.7-zip.org/ ++ ++ LZMA SDK is licensed under two licenses: ++ 1) GNU Lesser General Public License (GNU LGPL) ++ 2) Common Public License (CPL) ++ It means that you can select one of these two licenses and ++ follow rules of that license. ++ ++ SPECIAL EXCEPTION: ++ Igor Pavlov, as the author of this Code, expressly permits you to ++ statically or dynamically link your Code (or bind by name) to the ++ interfaces of this file without subjecting your linked Code to the ++ terms of the CPL or GNU LGPL. Any modifications or additions ++ to this file, however, are subject to the LGPL or CPL terms. ++*/ ++ ++#include "LzmaDecode.h" ++ ++#ifndef Byte ++#define Byte unsigned char ++#endif ++ ++#define kNumTopBits 24 ++#define kTopValue ((UInt32)1 << kNumTopBits) ++ ++#define kNumBitModelTotalBits 11 ++#define kBitModelTotal (1 << kNumBitModelTotalBits) ++#define kNumMoveBits 5 ++ ++#define RC_READ_BYTE (*Buffer++) ++ ++#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \ ++ { int i; for(i = 0; i < 5; i++) { RC_TEST; Code = (Code << 8) | RC_READ_BYTE; }} ++ ++#ifdef _LZMA_IN_CB ++ ++#define RC_TEST { if (Buffer == BufferLim) \ ++ { SizeT size; int result = InCallback->Read(InCallback, &Buffer, &size); if (result != LZMA_RESULT_OK) return result; \ ++ BufferLim = Buffer + size; if (size == 0) return LZMA_RESULT_DATA_ERROR; }} ++ ++#define RC_INIT Buffer = BufferLim = 0; RC_INIT2 ++ ++#else ++ ++#define RC_TEST { if (Buffer == BufferLim) return LZMA_RESULT_DATA_ERROR; } ++ ++#define RC_INIT(buffer, bufferSize) Buffer = buffer; BufferLim = buffer + bufferSize; RC_INIT2 ++ ++#endif ++ ++#define RC_NORMALIZE if (Range < kTopValue) { RC_TEST; Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; } ++ ++#define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound) ++#define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits; ++#define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits; ++ ++#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \ ++ { UpdateBit0(p); mi <<= 1; A0; } else \ ++ { UpdateBit1(p); mi = (mi + mi) + 1; A1; } ++ ++#define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;) ++ ++#define RangeDecoderBitTreeDecode(probs, numLevels, res) \ ++ { int i = numLevels; res = 1; \ ++ do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \ ++ res -= (1 << numLevels); } ++ ++ ++#define kNumPosBitsMax 4 ++#define kNumPosStatesMax (1 << kNumPosBitsMax) ++ ++#define kLenNumLowBits 3 ++#define kLenNumLowSymbols (1 << kLenNumLowBits) ++#define kLenNumMidBits 3 ++#define kLenNumMidSymbols (1 << kLenNumMidBits) ++#define kLenNumHighBits 8 ++#define kLenNumHighSymbols (1 << kLenNumHighBits) ++ ++#define LenChoice 0 ++#define LenChoice2 (LenChoice + 1) ++#define LenLow (LenChoice2 + 1) ++#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) ++#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) ++#define kNumLenProbs (LenHigh + kLenNumHighSymbols) ++ ++ ++#define kNumStates 12 ++#define kNumLitStates 7 ++ ++#define kStartPosModelIndex 4 ++#define kEndPosModelIndex 14 ++#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) ++ ++#define kNumPosSlotBits 6 ++#define kNumLenToPosStates 4 ++ ++#define kNumAlignBits 4 ++#define kAlignTableSize (1 << kNumAlignBits) ++ ++#define kMatchMinLen 2 ++ ++#define IsMatch 0 ++#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) ++#define IsRepG0 (IsRep + kNumStates) ++#define IsRepG1 (IsRepG0 + kNumStates) ++#define IsRepG2 (IsRepG1 + kNumStates) ++#define IsRep0Long (IsRepG2 + kNumStates) ++#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) ++#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) ++#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) ++#define LenCoder (Align + kAlignTableSize) ++#define RepLenCoder (LenCoder + kNumLenProbs) ++#define Literal (RepLenCoder + kNumLenProbs) ++ ++#if Literal != LZMA_BASE_SIZE ++StopCompilingDueBUG ++#endif ++ ++int LzmaDecodeProperties(CLzmaProperties *propsRes, const unsigned char *propsData, int size) ++{ ++ unsigned char prop0; ++ if (size < LZMA_PROPERTIES_SIZE) ++ return LZMA_RESULT_DATA_ERROR; ++ prop0 = propsData[0]; ++ if (prop0 >= (9 * 5 * 5)) ++ return LZMA_RESULT_DATA_ERROR; ++ { ++ for (propsRes->pb = 0; prop0 >= (9 * 5); propsRes->pb++, prop0 -= (9 * 5)); ++ for (propsRes->lp = 0; prop0 >= 9; propsRes->lp++, prop0 -= 9); ++ propsRes->lc = prop0; ++ /* ++ unsigned char remainder = (unsigned char)(prop0 / 9); ++ propsRes->lc = prop0 % 9; ++ propsRes->pb = remainder / 5; ++ propsRes->lp = remainder % 5; ++ */ ++ } ++ ++ #ifdef _LZMA_OUT_READ ++ { ++ int i; ++ propsRes->DictionarySize = 0; ++ for (i = 0; i < 4; i++) ++ propsRes->DictionarySize += (UInt32)(propsData[1 + i]) << (i * 8); ++ if (propsRes->DictionarySize == 0) ++ propsRes->DictionarySize = 1; ++ } ++ #endif ++ return LZMA_RESULT_OK; ++} ++ ++#define kLzmaStreamWasFinishedId (-1) ++ ++int LzmaDecode(CLzmaDecoderState *vs, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *InCallback, ++ #else ++ const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed, ++ #endif ++ unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed) ++{ ++ CProb *p = vs->Probs; ++ SizeT nowPos = 0; ++ Byte previousByte = 0; ++ UInt32 posStateMask = (1 << (vs->Properties.pb)) - 1; ++ UInt32 literalPosMask = (1 << (vs->Properties.lp)) - 1; ++ int lc = vs->Properties.lc; ++ ++ #ifdef _LZMA_OUT_READ ++ ++ UInt32 Range = vs->Range; ++ UInt32 Code = vs->Code; ++ #ifdef _LZMA_IN_CB ++ const Byte *Buffer = vs->Buffer; ++ const Byte *BufferLim = vs->BufferLim; ++ #else ++ const Byte *Buffer = inStream; ++ const Byte *BufferLim = inStream + inSize; ++ #endif ++ int state = vs->State; ++ UInt32 rep0 = vs->Reps[0], rep1 = vs->Reps[1], rep2 = vs->Reps[2], rep3 = vs->Reps[3]; ++ int len = vs->RemainLen; ++ UInt32 globalPos = vs->GlobalPos; ++ UInt32 distanceLimit = vs->DistanceLimit; ++ ++ Byte *dictionary = vs->Dictionary; ++ UInt32 dictionarySize = vs->Properties.DictionarySize; ++ UInt32 dictionaryPos = vs->DictionaryPos; ++ ++ Byte tempDictionary[4]; ++ ++ #ifndef _LZMA_IN_CB ++ *inSizeProcessed = 0; ++ #endif ++ *outSizeProcessed = 0; ++ if (len == kLzmaStreamWasFinishedId) ++ return LZMA_RESULT_OK; ++ ++ if (dictionarySize == 0) ++ { ++ dictionary = tempDictionary; ++ dictionarySize = 1; ++ tempDictionary[0] = vs->TempDictionary[0]; ++ } ++ ++ if (len == kLzmaNeedInitId) ++ { ++ { ++ UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + vs->Properties.lp)); ++ UInt32 i; ++ for (i = 0; i < numProbs; i++) ++ p[i] = kBitModelTotal >> 1; ++ rep0 = rep1 = rep2 = rep3 = 1; ++ state = 0; ++ globalPos = 0; ++ distanceLimit = 0; ++ dictionaryPos = 0; ++ dictionary[dictionarySize - 1] = 0; ++ #ifdef _LZMA_IN_CB ++ RC_INIT; ++ #else ++ RC_INIT(inStream, inSize); ++ #endif ++ } ++ len = 0; ++ } ++ while(len != 0 && nowPos < outSize) ++ { ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ outStream[nowPos++] = dictionary[dictionaryPos] = dictionary[pos]; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ len--; ++ } ++ if (dictionaryPos == 0) ++ previousByte = dictionary[dictionarySize - 1]; ++ else ++ previousByte = dictionary[dictionaryPos - 1]; ++ ++ #else /* if !_LZMA_OUT_READ */ ++ ++ int state = 0; ++ UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; ++ int len = 0; ++ const Byte *Buffer; ++ const Byte *BufferLim; ++ UInt32 Range; ++ UInt32 Code; ++ ++ #ifndef _LZMA_IN_CB ++ *inSizeProcessed = 0; ++ #endif ++ *outSizeProcessed = 0; ++ ++ { ++ UInt32 i; ++ UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + vs->Properties.lp)); ++ for (i = 0; i < numProbs; i++) ++ p[i] = kBitModelTotal >> 1; ++ } ++ ++ #ifdef _LZMA_IN_CB ++ RC_INIT; ++ #else ++ RC_INIT(inStream, inSize); ++ #endif ++ ++ #endif /* _LZMA_OUT_READ */ ++ ++ while(nowPos < outSize) ++ { ++ CProb *prob; ++ UInt32 bound; ++ int posState = (int)( ++ (nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ & posStateMask); ++ ++ prob = p + IsMatch + (state << kNumPosBitsMax) + posState; ++ IfBit0(prob) ++ { ++ int symbol = 1; ++ UpdateBit0(prob) ++ prob = p + Literal + (LZMA_LIT_SIZE * ++ ((( ++ (nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ & literalPosMask) << lc) + (previousByte >> (8 - lc)))); ++ ++ if (state >= kNumLitStates) ++ { ++ int matchByte; ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ matchByte = dictionary[pos]; ++ #else ++ matchByte = outStream[nowPos - rep0]; ++ #endif ++ do ++ { ++ int bit; ++ CProb *probLit; ++ matchByte <<= 1; ++ bit = (matchByte & 0x100); ++ probLit = prob + 0x100 + bit + symbol; ++ RC_GET_BIT2(probLit, symbol, if (bit != 0) break, if (bit == 0) break) ++ } ++ while (symbol < 0x100); ++ } ++ while (symbol < 0x100) ++ { ++ CProb *probLit = prob + symbol; ++ RC_GET_BIT(probLit, symbol) ++ } ++ previousByte = (Byte)symbol; ++ ++ outStream[nowPos++] = previousByte; ++ #ifdef _LZMA_OUT_READ ++ if (distanceLimit < dictionarySize) ++ distanceLimit++; ++ ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #endif ++ if (state < 4) state = 0; ++ else if (state < 10) state -= 3; ++ else state -= 6; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ prob = p + IsRep + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ rep3 = rep2; ++ rep2 = rep1; ++ rep1 = rep0; ++ state = state < kNumLitStates ? 0 : 3; ++ prob = p + LenCoder; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ prob = p + IsRepG0 + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ prob = p + IsRep0Long + (state << kNumPosBitsMax) + posState; ++ IfBit0(prob) ++ { ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos; ++ #endif ++ UpdateBit0(prob); ++ ++ #ifdef _LZMA_OUT_READ ++ if (distanceLimit == 0) ++ #else ++ if (nowPos == 0) ++ #endif ++ return LZMA_RESULT_DATA_ERROR; ++ ++ state = state < kNumLitStates ? 9 : 11; ++ #ifdef _LZMA_OUT_READ ++ pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ previousByte = dictionary[pos]; ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #else ++ previousByte = outStream[nowPos - rep0]; ++ #endif ++ outStream[nowPos++] = previousByte; ++ #ifdef _LZMA_OUT_READ ++ if (distanceLimit < dictionarySize) ++ distanceLimit++; ++ #endif ++ ++ continue; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ } ++ } ++ else ++ { ++ UInt32 distance; ++ UpdateBit1(prob); ++ prob = p + IsRepG1 + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ distance = rep1; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ prob = p + IsRepG2 + state; ++ IfBit0(prob) ++ { ++ UpdateBit0(prob); ++ distance = rep2; ++ } ++ else ++ { ++ UpdateBit1(prob); ++ distance = rep3; ++ rep3 = rep2; ++ } ++ rep2 = rep1; ++ } ++ rep1 = rep0; ++ rep0 = distance; ++ } ++ state = state < kNumLitStates ? 8 : 11; ++ prob = p + RepLenCoder; ++ } ++ { ++ int numBits, offset; ++ CProb *probLen = prob + LenChoice; ++ IfBit0(probLen) ++ { ++ UpdateBit0(probLen); ++ probLen = prob + LenLow + (posState << kLenNumLowBits); ++ offset = 0; ++ numBits = kLenNumLowBits; ++ } ++ else ++ { ++ UpdateBit1(probLen); ++ probLen = prob + LenChoice2; ++ IfBit0(probLen) ++ { ++ UpdateBit0(probLen); ++ probLen = prob + LenMid + (posState << kLenNumMidBits); ++ offset = kLenNumLowSymbols; ++ numBits = kLenNumMidBits; ++ } ++ else ++ { ++ UpdateBit1(probLen); ++ probLen = prob + LenHigh; ++ offset = kLenNumLowSymbols + kLenNumMidSymbols; ++ numBits = kLenNumHighBits; ++ } ++ } ++ RangeDecoderBitTreeDecode(probLen, numBits, len); ++ len += offset; ++ } ++ ++ if (state < 4) ++ { ++ int posSlot; ++ state += kNumLitStates; ++ prob = p + PosSlot + ++ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << ++ kNumPosSlotBits); ++ RangeDecoderBitTreeDecode(prob, kNumPosSlotBits, posSlot); ++ if (posSlot >= kStartPosModelIndex) ++ { ++ int numDirectBits = ((posSlot >> 1) - 1); ++ rep0 = (2 | ((UInt32)posSlot & 1)); ++ if (posSlot < kEndPosModelIndex) ++ { ++ rep0 <<= numDirectBits; ++ prob = p + SpecPos + rep0 - posSlot - 1; ++ } ++ else ++ { ++ numDirectBits -= kNumAlignBits; ++ do ++ { ++ RC_NORMALIZE ++ Range >>= 1; ++ rep0 <<= 1; ++ if (Code >= Range) ++ { ++ Code -= Range; ++ rep0 |= 1; ++ } ++ } ++ while (--numDirectBits != 0); ++ prob = p + Align; ++ rep0 <<= kNumAlignBits; ++ numDirectBits = kNumAlignBits; ++ } ++ { ++ int i = 1; ++ int mi = 1; ++ do ++ { ++ CProb *prob3 = prob + mi; ++ RC_GET_BIT2(prob3, mi, ; , rep0 |= i); ++ i <<= 1; ++ } ++ while(--numDirectBits != 0); ++ } ++ } ++ else ++ rep0 = posSlot; ++ if (++rep0 == (UInt32)(0)) ++ { ++ /* it's for stream version */ ++ len = kLzmaStreamWasFinishedId; ++ break; ++ } ++ } ++ ++ len += kMatchMinLen; ++ #ifdef _LZMA_OUT_READ ++ if (rep0 > distanceLimit) ++ #else ++ if (rep0 > nowPos) ++ #endif ++ return LZMA_RESULT_DATA_ERROR; ++ ++ #ifdef _LZMA_OUT_READ ++ if (dictionarySize - distanceLimit > (UInt32)len) ++ distanceLimit += len; ++ else ++ distanceLimit = dictionarySize; ++ #endif ++ ++ do ++ { ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ previousByte = dictionary[pos]; ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #else ++ previousByte = outStream[nowPos - rep0]; ++ #endif ++ len--; ++ outStream[nowPos++] = previousByte; ++ } ++ while(len != 0 && nowPos < outSize); ++ } ++ } ++ RC_NORMALIZE; ++ ++ #ifdef _LZMA_OUT_READ ++ vs->Range = Range; ++ vs->Code = Code; ++ vs->DictionaryPos = dictionaryPos; ++ vs->GlobalPos = globalPos + (UInt32)nowPos; ++ vs->DistanceLimit = distanceLimit; ++ vs->Reps[0] = rep0; ++ vs->Reps[1] = rep1; ++ vs->Reps[2] = rep2; ++ vs->Reps[3] = rep3; ++ vs->State = state; ++ vs->RemainLen = len; ++ vs->TempDictionary[0] = tempDictionary[0]; ++ #endif ++ ++ #ifdef _LZMA_IN_CB ++ vs->Buffer = Buffer; ++ vs->BufferLim = BufferLim; ++ #else ++ *inSizeProcessed = (SizeT)(Buffer - inStream); ++ #endif ++ *outSizeProcessed = nowPos; ++ return LZMA_RESULT_OK; ++} +diff --git a/init/LzmaDecode.h b/init/LzmaDecode.h +new file mode 100644 +index 0000000..213062a +--- /dev/null ++++ b/init/LzmaDecode.h +@@ -0,0 +1,131 @@ ++/* ++ LzmaDecode.h ++ LZMA Decoder interface ++ ++ LZMA SDK 4.21 Copyright (c) 1999-2005 Igor Pavlov (2005-06-08) ++ http://www.7-zip.org/ ++ ++ LZMA SDK is licensed under two licenses: ++ 1) GNU Lesser General Public License (GNU LGPL) ++ 2) Common Public License (CPL) ++ It means that you can select one of these two licenses and ++ follow rules of that license. ++ ++ SPECIAL EXCEPTION: ++ Igor Pavlov, as the author of this code, expressly permits you to ++ statically or dynamically link your code (or bind by name) to the ++ interfaces of this file without subjecting your linked code to the ++ terms of the CPL or GNU LGPL. Any modifications or additions ++ to this file, however, are subject to the LGPL or CPL terms. ++*/ ++ ++#ifndef __LZMADECODE_H ++#define __LZMADECODE_H ++ ++/* #define _LZMA_IN_CB */ ++/* Use callback for input data */ ++ ++/* #define _LZMA_OUT_READ */ ++/* Use read function for output data */ ++ ++/* #define _LZMA_PROB32 */ ++/* It can increase speed on some 32-bit CPUs, ++ but memory usage will be doubled in that case */ ++ ++/* #define _LZMA_LOC_OPT */ ++/* Enable local speed optimizations inside code */ ++ ++/* #define _LZMA_SYSTEM_SIZE_T */ ++/* Use system's size_t. You can use it to enable 64-bit sizes supporting*/ ++ ++#ifndef UInt32 ++#ifdef _LZMA_UINT32_IS_ULONG ++#define UInt32 unsigned long ++#else ++#define UInt32 unsigned int ++#endif ++#endif ++ ++#ifndef SizeT ++#ifdef _LZMA_SYSTEM_SIZE_T ++#include <stddef.h> ++#define SizeT size_t ++#else ++#define SizeT UInt32 ++#endif ++#endif ++ ++#ifdef _LZMA_PROB32 ++#define CProb UInt32 ++#else ++#define CProb unsigned short ++#endif ++ ++#define LZMA_RESULT_OK 0 ++#define LZMA_RESULT_DATA_ERROR 1 ++ ++#ifdef _LZMA_IN_CB ++typedef struct _ILzmaInCallback ++{ ++ int (*Read)(void *object, const unsigned char **buffer, SizeT *bufferSize); ++} ILzmaInCallback; ++#endif ++ ++#define LZMA_BASE_SIZE 1846 ++#define LZMA_LIT_SIZE 768 ++ ++#define LZMA_PROPERTIES_SIZE 5 ++ ++typedef struct _CLzmaProperties ++{ ++ int lc; ++ int lp; ++ int pb; ++ #ifdef _LZMA_OUT_READ ++ UInt32 DictionarySize; ++ #endif ++}CLzmaProperties; ++ ++int LzmaDecodeProperties(CLzmaProperties *propsRes, const unsigned char *propsData, int size); ++ ++#define LzmaGetNumProbs(Properties) (LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((Properties)->lc + (Properties)->lp))) ++ ++#define kLzmaNeedInitId (-2) ++ ++typedef struct _CLzmaDecoderState ++{ ++ CLzmaProperties Properties; ++ CProb *Probs; ++ ++ #ifdef _LZMA_IN_CB ++ const unsigned char *Buffer; ++ const unsigned char *BufferLim; ++ #endif ++ ++ #ifdef _LZMA_OUT_READ ++ unsigned char *Dictionary; ++ UInt32 Range; ++ UInt32 Code; ++ UInt32 DictionaryPos; ++ UInt32 GlobalPos; ++ UInt32 DistanceLimit; ++ UInt32 Reps[4]; ++ int State; ++ int RemainLen; ++ unsigned char TempDictionary[4]; ++ #endif ++} CLzmaDecoderState; ++ ++#ifdef _LZMA_OUT_READ ++#define LzmaDecoderInit(vs) { (vs)->RemainLen = kLzmaNeedInitId; } ++#endif ++ ++int LzmaDecode(CLzmaDecoderState *vs, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *inCallback, ++ #else ++ const unsigned char *inStream, SizeT inSize, SizeT *inSizeProcessed, ++ #endif ++ unsigned char *outStream, SizeT outSize, SizeT *outSizeProcessed); ++ ++#endif +diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c +index ed652f4..5fd1ec5 100644 +--- a/init/do_mounts_rd.c ++++ b/init/do_mounts_rd.c +@@ -5,7 +5,9 @@ + #include <linux/ext2_fs.h> + #include <linux/romfs_fs.h> + #include <linux/cramfs_fs.h> ++#include <linux/squashfs_fs.h> + #include <linux/initrd.h> ++#include <linux/vmalloc.h> + #include <linux/string.h> + + #include "do_mounts.h" +@@ -31,6 +33,9 @@ static int __init ramdisk_start_setup(char *str) + __setup("ramdisk_start=", ramdisk_start_setup); + + static int __init crd_load(int in_fd, int out_fd); ++#ifdef CONFIG_LZMA_INITRD ++static int __init lzma_rd_load(int in_fd, int out_fd); ++#endif + + /* + * This routine tries to find a RAM disk image to load, and returns the +@@ -39,6 +44,7 @@ static int __init crd_load(int in_fd, int out_fd); + * numbers could not be found. + * + * We currently check for the following magic numbers: ++ * squashfs + * minix + * ext2 + * romfs +@@ -53,6 +59,7 @@ identify_ramdisk_image(int fd, int start_block) + struct ext2_super_block *ext2sb; + struct romfs_super_block *romfsb; + struct cramfs_super *cramfsb; ++ struct squashfs_super_block *squashfsb; + int nblocks = -1; + unsigned char *buf; + +@@ -64,6 +71,7 @@ identify_ramdisk_image(int fd, int start_block) + ext2sb = (struct ext2_super_block *) buf; + romfsb = (struct romfs_super_block *) buf; + cramfsb = (struct cramfs_super *) buf; ++ squashfsb = (struct squashfs_super_block *) buf; + memset(buf, 0xe5, size); + + /* +@@ -82,6 +90,17 @@ identify_ramdisk_image(int fd, int start_block) + nblocks = 0; + goto done; + } ++ /* ++ * handle lzma compressed initrd, returns nblocks=1 as indication ++ */ ++ if( buf[0] < 9 * 5 * 5 && buf[9] == 0 && buf[10] == 0 && buf[11] == 0 ++ && buf[12] == 0 ) ++ { ++ printk( KERN_NOTICE "RAMDISK: LZMA image found at block %d\n", ++ start_block); ++ nblocks = 1; // just a convenient return flag ++ goto done; ++ } + + /* romfs is at block zero too */ + if (romfsb->word0 == ROMSB_WORD0 && +@@ -101,6 +120,18 @@ identify_ramdisk_image(int fd, int start_block) + goto done; + } + ++ /* squashfs is at block zero too */ ++ if (squashfsb->s_magic == SQUASHFS_MAGIC) { ++ printk(KERN_NOTICE ++ "RAMDISK: squashfs filesystem found at block %d\n", ++ start_block); ++ if (squashfsb->s_major < 3) ++ nblocks = (squashfsb->bytes_used_2+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS; ++ else ++ nblocks = (squashfsb->bytes_used+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS; ++ goto done; ++ } ++ + /* + * Read block 1 to test for minix and ext2 superblock + */ +@@ -172,7 +203,22 @@ int __init rd_load_image(char *from) + #endif + goto done; + } +- ++#ifdef CONFIG_LZMA_INITRD ++ /* ++ * handle lzma compressed image ++ */ ++ if ( nblocks == 1 ) ++ { ++ nblocks = 0; ++ if ( lzma_rd_load(in_fd, out_fd) == 0 ) ++ { ++ printk("\nLZMA initrd loaded successfully\n"); ++ goto successful_load; ++ } ++ printk(KERN_NOTICE "LZMA initrd is not in the correct format\n"); ++ goto done; ++ } ++#endif + /* + * NOTE NOTE: nblocks is not actually blocks but + * the number of kibibytes of data to load into a ramdisk. +@@ -393,6 +439,134 @@ static void __init error(char *x) + unzip_error = 1; + } + ++#ifdef CONFIG_LZMA_INITRD ++#define _LZMA_IN_CB ++#define _LZMA_OUT_READ ++#include "LzmaDecode.h" ++#include "LzmaDecode.c" ++ ++static int read_byte(void *object, const unsigned char **buffer, SizeT *bufferSize); ++ ++/* ++ * Do the lzma decompression ++ */ ++static int __init lzma_rd_load(int in_fd, int out_fd) ++{ ++ unsigned int i; ++ CLzmaDecoderState state; ++ unsigned char* outputbuffer; ++ unsigned int uncompressedSize = 0; ++ unsigned char* p; ++ unsigned int kBlockSize = 0x10000; ++ unsigned int nowPos = 0; ++ unsigned int outsizeProcessed = 0; ++ int res; ++ ILzmaInCallback callback; ++ ++ insize = 0; /* valid bytes in inbuf */ ++ inptr = 0; /* index of next byte to be processed in inbuf */ ++ exit_code = 0; ++ crd_infd = in_fd; ++ inbuf = kmalloc(INBUFSIZ, GFP_KERNEL); ++ if (inbuf == 0) ++ { ++ printk(KERN_ERR "RAMDISK: Couldn't allocate lzma input buffer\n"); ++ return -1; ++ } ++ ++ callback.Read = read_byte; ++ ++ /* lzma args */ ++ i = get_byte(); ++ state.Properties.lc = i % 9, i = i / 9; ++ state.Properties.lp = i % 5, state.Properties.pb = i / 5; ++ ++ /* read dictionary size */ ++ p = (char*)&state.Properties.DictionarySize; ++ for (i = 0; i < 4; i++) ++ *p++ = get_byte(); ++ ++ /* get uncompressedSize */ ++ p= (char*)&uncompressedSize; ++ for (i = 0; i < 4; i++) ++ *p++ = get_byte(); ++ ++ /* skip big file */ ++ for (i = 0; i < 4; i++) ++ get_byte(); ++ ++ printk( KERN_NOTICE "RAMDISK: LZMA lc=%d,lp=%d,pb=%d,dictSize=%d,origSize=%d\n", ++ state.Properties.lc, state.Properties.lp, state.Properties.pb, state.Properties.DictionarySize, uncompressedSize); ++ outputbuffer = kmalloc(kBlockSize, GFP_KERNEL); ++ if (outputbuffer == 0) { ++ printk(KERN_ERR "RAMDISK: Couldn't allocate lzma output buffer\n"); ++ return -1; ++ } ++ ++ state.Probs = (CProb*)kmalloc( LzmaGetNumProbs(&state.Properties)*sizeof(CProb), GFP_KERNEL); ++ if ( state.Probs == 0) { ++ printk(KERN_ERR "RAMDISK: Couldn't allocate lzma workspace\n"); ++ return -1; ++ } ++ ++#ifdef CONFIG_LZMA_INITRD_KMALLOC_ONLY ++ state.Dictionary = kmalloc( state.Properties.DictionarySize, GFP_KERNEL); ++#else ++ state.Dictionary = vmalloc( state.Properties.DictionarySize); ++#endif ++ if ( state.Dictionary == 0) { ++ printk(KERN_ERR "RAMDISK: Couldn't allocate lzma dictionary\n"); ++ return -1; ++ } ++ ++ printk( KERN_NOTICE "LZMA initrd by Ming-Ching Tiew <mctiew@yahoo.com> " ); ++ ++ LzmaDecoderInit( &state ); ++ ++ for( nowPos =0; nowPos < uncompressedSize ; ) ++ { ++ UInt32 blockSize = uncompressedSize - nowPos; ++ if( blockSize > kBlockSize) ++ blockSize = kBlockSize; ++ res = LzmaDecode( &state, &callback, outputbuffer, blockSize, &outsizeProcessed); ++ if( res != 0 ) { ++ printk( KERN_ERR "RAMDISK: Lzma decode failure\n"); ++ return -1; ++ } ++ if( outsizeProcessed == 0 ) ++ { ++ uncompressedSize = nowPos; ++ printk( KERN_NOTICE "RAMDISK nowPos=%d, uncompressedSize=%d\n", ++ nowPos, uncompressedSize ); ++ break; ++ } ++ sys_write(out_fd, outputbuffer, outsizeProcessed ); ++ nowPos += outsizeProcessed; ++ printk( "."); ++ } ++ ++#ifdef CONFIG_LZMA_INITRD_KMALLOC_ONLY ++ kfree(state.Dictionary); ++#else ++ vfree(state.Dictionary); ++#endif ++ kfree(inbuf); ++ kfree(outputbuffer); ++ kfree(state.Probs); ++ return 0; ++} ++ ++static int read_byte(void *object, const unsigned char **buffer, SizeT *bufferSize) ++{ ++ static unsigned char val; ++ *bufferSize = 1; ++ val = get_byte(); ++ *buffer = &val; ++ return LZMA_RESULT_OK; ++} ++ ++#endif /*CONFIG_LZMA_INITRD*/ ++ + static int __init crd_load(int in_fd, int out_fd) + { + int result; +diff --git a/init/initramfs.c b/init/initramfs.c +index 00eff7a..30d32a2 100644 +--- a/init/initramfs.c ++++ b/init/initramfs.c +@@ -6,6 +6,7 @@ + #include <linux/delay.h> + #include <linux/string.h> + #include <linux/syscalls.h> ++#include <linux/vmalloc.h> + + static __initdata char *message; + static void __init error(char *x) +@@ -441,6 +442,118 @@ static void __init flush_window(void) + outcnt = 0; + } + ++#ifdef CONFIG_LZMA_INITRAM_FS ++#define _LZMA_IN_CB ++#define _LZMA_OUT_READ ++#include "LzmaDecode.h" ++#ifndef CONFIG_LZMA_INITRD ++ #include "LzmaDecode.c" ++#endif ++static int read_byte(void *object, const unsigned char **buffer, SizeT *bufferSize) ++{ ++ static unsigned char val; ++ *bufferSize = 1; ++ val = get_byte(); ++ *buffer = &val; ++ return LZMA_RESULT_OK; ++} ++ ++static int __init lzma_unzip(void) ++{ ++ unsigned int i; ++ CLzmaDecoderState state; ++ unsigned char* outputbuffer; ++ unsigned int uncompressedSize = 0; ++ unsigned char* p; ++ unsigned int kBlockSize = 0x10000; ++ unsigned int nowPos = 0; ++ unsigned int outsizeProcessed = 0; ++ int res; ++ ILzmaInCallback callback; ++ ++ callback.Read = read_byte; ++ ++ // lzma args ++ i = get_byte(); ++ state.Properties.lc = i % 9, i = i / 9; ++ state.Properties.lp = i % 5, state.Properties.pb = i / 5; ++ ++ // read dictionary size ++ p = (char*)&state.Properties.DictionarySize; ++ for (i = 0; i < 4; i++) ++ *p++ = get_byte(); ++ ++ // get uncompressedSize ++ p= (char*)&uncompressedSize; ++ for (i = 0; i < 4; i++) ++ *p++ = get_byte(); ++ ++ // skip big file ++ for (i = 0; i < 4; i++) ++ get_byte(); ++ ++ printk( KERN_NOTICE "initramfs: LZMA lc=%d,lp=%d,pb=%d,dictSize=%d,origSize=%d\n", ++ state.Properties.lc,state.Properties.lp,state.Properties.pb,state.Properties.DictionarySize, uncompressedSize); ++ outputbuffer = kmalloc(kBlockSize, GFP_KERNEL); ++ if (outputbuffer == 0) { ++ printk(KERN_ERR "initramfs: Couldn't allocate lzma output buffer\n"); ++ return -1; ++ } ++ ++ state.Probs = (CProb*) kmalloc( LzmaGetNumProbs(&state.Properties)*sizeof(CProb), GFP_KERNEL); ++ if ( state.Probs == 0) { ++ printk(KERN_ERR "initramfs: Couldn't allocate lzma workspace\n"); ++ return -1; ++ } ++ ++#ifdef CONFIG_LZMA_INITRAM_FS_KMALLOC_ONLY ++ state.Dictionary = kmalloc( state.Properties.DictionarySize, GFP_KERNEL); ++#else ++ state.Dictionary = vmalloc( state.Properties.DictionarySize); ++#endif ++ if ( state.Dictionary == 0) { ++ printk(KERN_ERR "initramfs: Couldn't allocate lzma dictionary\n"); ++ return -1; ++ } ++ ++ printk( KERN_NOTICE "LZMA initramfs by Ming-Ching Tiew <mctiew@yahoo.com> " ); ++ ++ LzmaDecoderInit( &state ); ++ ++ for( nowPos =0; nowPos < uncompressedSize ; ) ++ { ++ UInt32 blockSize = uncompressedSize - nowPos; ++ if( blockSize > kBlockSize) ++ blockSize = kBlockSize; ++ res = LzmaDecode( &state, &callback, outputbuffer, blockSize, &outsizeProcessed); ++ if( res != 0 ) { ++ panic( KERN_ERR "initramfs: Lzma decode failure\n"); ++ return -1; ++ } ++ if( outsizeProcessed == 0 ) ++ { ++ uncompressedSize = nowPos; ++ printk( KERN_NOTICE "initramfs: nowPos=%d, uncompressedSize=%d\n", ++ nowPos, uncompressedSize ); ++ break; ++ } ++ flush_buffer(outputbuffer, outsizeProcessed); ++ nowPos += outsizeProcessed; ++ printk( "."); ++ } ++ ++#ifdef CONFIG_LZMA_INITRAM_FS_KMALLOC_ONLY ++ kfree(state.Dictionary); ++#else ++ vfree(state.Dictionary); ++#endif ++ kfree(outputbuffer); ++ kfree(state.Probs); ++ return 0; ++} ++ ++#endif /*CONFIG LZMA_INITRAM_FS*/ ++ + static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) + { + int written; +@@ -475,12 +588,31 @@ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only) + inptr = 0; + outcnt = 0; /* bytes in output buffer */ + bytes_out = 0; +- crc = (ulg)0xffffffffL; /* shift register contents */ +- makecrc(); +- gunzip(); +- if (state != Reset) ++ if( inbuf[0] == 037 && ((inbuf[1] == 0213) || (inbuf[1] == 0236))) ++ { ++ printk( KERN_NOTICE "detected gzip initramfs\n"); ++ crc = (ulg)0xffffffffL; /* shift register contents */ ++ makecrc(); ++ gunzip(); ++ if (state != Reset) + error("junk in gzipped archive"); +- this_header = saved_offset + inptr; ++ } ++#ifdef CONFIG_LZMA_INITRAM_FS ++ else if( inbuf[0] < 9 * 5 * 5 && buf[9] == 0 && buf[10] == 0 ++ && buf[11] == 0 && buf[12] == 0 ) ++ { ++ printk( KERN_NOTICE "detected lzma initramfs\n"); ++ lzma_unzip(); ++ } ++#endif ++ else ++ { ++ // skip forward ? ++ crc = (ulg)0xffffffffL; /* shift register contents */ ++ makecrc(); ++ gunzip(); ++ } ++ this_header = saved_offset + inptr; + buf += inptr; + len -= inptr; + } +diff --git a/kernel/Makefile b/kernel/Makefile +index ac6b27a..bd498a2 100644 +--- a/kernel/Makefile ++++ b/kernel/Makefile +@@ -66,7 +66,7 @@ $(obj)/configs.o: $(obj)/config_data.h + # config_data.h contains the same information as ikconfig.h but gzipped. + # Info from config_data can be extracted from /proc/config* + targets += config_data.gz +-$(obj)/config_data.gz: .config FORCE ++$(obj)/config_data.gz: .miniconfig FORCE + $(call if_changed,gzip) + + quiet_cmd_ikconfiggz = IKCFG $@ +diff --git a/kernel/configs.c b/kernel/configs.c +index 8fa1fb2..c8407eb 100644 +--- a/kernel/configs.c ++++ b/kernel/configs.c +@@ -88,7 +88,7 @@ static int __init ikconfig_init(void) + struct proc_dir_entry *entry; + + /* create the current config file */ +- entry = create_proc_entry("config.gz", S_IFREG | S_IRUGO, ++ entry = create_proc_entry("miniconfig.gz", S_IFREG | S_IRUGO, + &proc_root); + if (!entry) + return -ENOMEM; +@@ -104,7 +104,7 @@ static int __init ikconfig_init(void) + + static void __exit ikconfig_cleanup(void) + { +- remove_proc_entry("config.gz", &proc_root); ++ remove_proc_entry("miniconfig.gz", &proc_root); + } + + module_init(ikconfig_init); +diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c +index fe5c7db..a5150e6 100644 +--- a/kernel/time/clocksource.c ++++ b/kernel/time/clocksource.c +@@ -85,8 +85,8 @@ static void clocksource_ratewd(struct clocksource *cs, int64_t delta) + if (delta > -WATCHDOG_TRESHOLD && delta < WATCHDOG_TRESHOLD) + return; + +- printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", +- cs->name, delta); ++/* printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", ++ cs->name, delta); */ + cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); + clocksource_change_rating(cs, 0); + cs->flags &= ~CLOCK_SOURCE_WATCHDOG; +diff --git a/kernel/timer.c b/kernel/timer.c +index dd6c2c1..3a8f485 100644 +--- a/kernel/timer.c ++++ b/kernel/timer.c +@@ -916,8 +916,8 @@ static void change_clocksource(void) + + tick_clock_notify(); + +- printk(KERN_INFO "Time: %s clocksource has been installed.\n", +- clock->name); ++/* printk(KERN_INFO "Time: %s clocksource has been installed.\n", ++ clock->name); */ + } + #else + static inline void change_clocksource(void) { } +diff --git a/miniconfig.sh b/miniconfig.sh +new file mode 100755 +index 0000000..28e7433 +--- /dev/null ++++ b/miniconfig.sh +@@ -0,0 +1,2 @@ ++#!/bin/sh -f ++make allnoconfig KCONFIG_ALLCONFIG=.miniconfig +diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib +index fc498fe..e98172c 100644 +--- a/scripts/Makefile.lib ++++ b/scripts/Makefile.lib +@@ -162,4 +162,9 @@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ + quiet_cmd_gzip = GZIP $@ + cmd_gzip = gzip -f -9 < $< > $@ + ++# LZMA ++# ++quiet_cmd_lzma = LZMA $@ ++cmd_lzma = lzma e $< $@ -lc7 -lp0 -pb0 2>/dev/null ++ + +diff --git a/scripts/gen_lzma_initramfs_list.sh b/scripts/gen_lzma_initramfs_list.sh +new file mode 100644 +index 0000000..be3ed6a +--- /dev/null ++++ b/scripts/gen_lzma_initramfs_list.sh +@@ -0,0 +1,292 @@ ++#!/bin/bash ++# Copyright (C) Martin Schlemmer <azarah@nosferatu.za.org> ++# Copyright (c) 2006 Sam Ravnborg <sam@ravnborg.org> ++# ++# Released under the terms of the GNU GPL ++# ++# Generate a cpio packed initramfs. It uses gen_init_cpio to generate ++# the cpio archive, and gzip to pack it. ++# The script may also be used to generate the inputfile used for gen_init_cpio ++# This script assumes that gen_init_cpio is located in usr/ directory ++ ++# error out on errors ++set -e ++ ++usage() { ++cat << EOF ++Usage: ++$0 [-o <file>] [-u <uid>] [-g <gid>] { -s | -d | <cpio_source>} ... ++ -o <file> Create lzma initramfs file named <file> using ++ gen_init_cpio and lzma ++ -u <uid> User ID to map to user ID 0 (root). ++ <uid> is only meaningful if <cpio_source> ++ is a directory. ++ -g <gid> Group ID to map to group ID 0 (root). ++ <gid> is only meaningful if <cpio_source> ++ is a directory. ++ <cpio_source> File list or directory for cpio archive. ++ If <cpio_source> is a .cpio file it will be used ++ as direct input to initramfs. ++ -s Create lzma file with small dictionary size ++ -d Output the default cpio list. ++ ++All options except -o and -l may be repeated and are interpreted ++sequentially and immediately. -u and -g states are preserved across ++<cpio_source> options so an explicit "-u 0 -g 0" is required ++to reset the root/group mapping. ++EOF ++} ++ ++list_default_initramfs() { ++ # echo usr/kinit/kinit ++ : ++} ++ ++default_initramfs() { ++ cat <<-EOF >> ${output} ++ # This is a very simple, default initramfs ++ ++ dir /dev 0755 0 0 ++ nod /dev/console 0600 0 0 c 5 1 ++ dir /root 0700 0 0 ++ # file /kinit usr/kinit/kinit 0755 0 0 ++ # slink /init kinit 0755 0 0 ++ EOF ++} ++ ++filetype() { ++ local argv1="$1" ++ ++ # symlink test must come before file test ++ if [ -L "${argv1}" ]; then ++ echo "slink" ++ elif [ -f "${argv1}" ]; then ++ echo "file" ++ elif [ -d "${argv1}" ]; then ++ echo "dir" ++ elif [ -b "${argv1}" -o -c "${argv1}" ]; then ++ echo "nod" ++ elif [ -p "${argv1}" ]; then ++ echo "pipe" ++ elif [ -S "${argv1}" ]; then ++ echo "sock" ++ else ++ echo "invalid" ++ fi ++ return 0 ++} ++ ++list_print_mtime() { ++ : ++} ++ ++print_mtime() { ++ local my_mtime="0" ++ ++ if [ -e "$1" ]; then ++ my_mtime=$(find "$1" -printf "%T@\n" | sort -r | head -n 1) ++ fi ++ ++ echo "# Last modified: ${my_mtime}" >> ${output} ++ echo "" >> ${output} ++} ++ ++list_parse() { ++ echo "$1 \\" ++} ++ ++# for each file print a line in following format ++# <filetype> <name> <path to file> <octal mode> <uid> <gid> ++# for links, devices etc the format differs. See gen_init_cpio for details ++parse() { ++ local location="$1" ++ local name="${location/${srcdir}//}" ++ # change '//' into '/' ++ name="${name//\/\///}" ++ local mode="$2" ++ local uid="$3" ++ local gid="$4" ++ local ftype=$(filetype "${location}") ++ # remap uid/gid to 0 if necessary ++ [ "$uid" -eq "$root_uid" ] && uid=0 ++ [ "$gid" -eq "$root_gid" ] && gid=0 ++ local str="${mode} ${uid} ${gid}" ++ ++ [ "${ftype}" == "invalid" ] && return 0 ++ [ "${location}" == "${srcdir}" ] && return 0 ++ ++ case "${ftype}" in ++ "file") ++ str="${ftype} ${name} ${location} ${str}" ++ ;; ++ "nod") ++ local dev_type= ++ local maj=$(LC_ALL=C ls -l "${location}" | \ ++ gawk '{sub(/,/, "", $5); print $5}') ++ local min=$(LC_ALL=C ls -l "${location}" | \ ++ gawk '{print $6}') ++ ++ if [ -b "${location}" ]; then ++ dev_type="b" ++ else ++ dev_type="c" ++ fi ++ str="${ftype} ${name} ${str} ${dev_type} ${maj} ${min}" ++ ;; ++ "slink") ++ local target=$(LC_ALL=C ls -l "${location}" | \ ++ gawk '{print $11}') ++ str="${ftype} ${name} ${target} ${str}" ++ ;; ++ *) ++ str="${ftype} ${name} ${str}" ++ ;; ++ esac ++ ++ echo "${str}" >> ${output} ++ ++ return 0 ++} ++ ++unknown_option() { ++ printf "ERROR: unknown option \"$arg\"\n" >&2 ++ printf "If the filename validly begins with '-', " >&2 ++ printf "then it must be prefixed\n" >&2 ++ printf "by './' so that it won't be interpreted as an option." >&2 ++ printf "\n" >&2 ++ usage >&2 ++ exit 1 ++} ++ ++list_header() { ++ : ++} ++ ++header() { ++ printf "\n#####################\n# $1\n" >> ${output} ++} ++ ++# process one directory (incl sub-directories) ++dir_filelist() { ++ ${dep_list}header "$1" ++ ++ srcdir=$(echo "$1" | sed -e 's://*:/:g') ++ dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" 2>/dev/null) ++ ++ # If $dirlist is only one line, then the directory is empty ++ if [ "$(echo "${dirlist}" | wc -l)" -gt 1 ]; then ++ ${dep_list}print_mtime "$1" ++ ++ echo "${dirlist}" | \ ++ while read x; do ++ ${dep_list}parse ${x} ++ done ++ fi ++} ++ ++# if only one file is specified and it is .cpio file then use it direct as fs ++# if a directory is specified then add all files in given direcotry to fs ++# if a regular file is specified assume it is in gen_initramfs format ++input_file() { ++ source="$1" ++ if [ -f "$1" ]; then ++ ${dep_list}header "$1" ++ is_cpio="$(echo "$1" | sed 's/^.*\.cpio/cpio/')" ++ if [ $2 -eq 0 -a ${is_cpio} == "cpio" ]; then ++ cpio_file=$1 ++ [ ! -z ${dep_list} ] && echo "$1" ++ return 0 ++ fi ++ if [ -z ${dep_list} ]; then ++ print_mtime "$1" >> ${output} ++ cat "$1" >> ${output} ++ else ++ cat "$1" | while read type dir file perm ; do ++ if [ "$type" == "file" ]; then ++ echo "$file \\"; ++ fi ++ done ++ fi ++ elif [ -d "$1" ]; then ++ dir_filelist "$1" ++ else ++ echo " ${prog}: Cannot open '$1'" >&2 ++ exit 1 ++ fi ++} ++ ++prog=$0 ++root_uid=0 ++root_gid=0 ++dep_list= ++cpio_file= ++cpio_list= ++output="/dev/stdout" ++output_file="" ++opt="" ++ ++arg="$1" ++case "$arg" in ++ "-l") # files included in initramfs - used by kbuild ++ dep_list="list_" ++ echo "deps_initramfs := \\" ++ shift ++ ;; ++ "-o") # generate lzma-ed cpio image named $1 ++ shift ++ output_file="$1" ++ cpio_list="$(mktemp ${TMPDIR:-/tmp}/cpiolist.XXXXXX)" ++ output=${cpio_list} ++ shift ++ ;; ++esac ++while [ $# -gt 0 ]; do ++ arg="$1" ++ shift ++ case "$arg" in ++ "-u") # map $1 to uid=0 (root) ++ root_uid="$1" ++ shift ++ ;; ++ "-g") # map $1 to gid=0 (root) ++ root_gid="$1" ++ shift ++ ;; ++ "-s") ++ opt="-d16" ++ ;; ++ "-d") # display default initramfs list ++ default_list="$arg" ++ ${dep_list}default_initramfs ++ ;; ++ "-h") ++ usage ++ exit 0 ++ ;; ++ *) ++ case "$arg" in ++ "-"*) ++ unknown_option ++ ;; ++ *) # input file/dir - process it ++ input_file "$arg" "$#" ++ ;; ++ esac ++ ;; ++ esac ++done ++ ++# If output_file is set we will generate cpio archive and lzma it ++# we are carefull to delete tmp files ++if [ ! -z ${output_file} ]; then ++ if [ -z ${cpio_file} ]; then ++ cpio_tfile="$(mktemp ${TMPDIR:-/tmp}/cpiofile.XXXXXX)" ++ usr/gen_init_cpio ${cpio_list} > ${cpio_tfile} ++ else ++ cpio_tfile=${cpio_file} ++ fi ++ rm ${cpio_list} ++ lzma e ${cpio_tfile} ${output_file} ${opt} ++ [ -z ${cpio_file} ] && rm ${cpio_tfile} ++fi ++exit 0 +diff --git a/shrinkconfig.sh b/shrinkconfig.sh +new file mode 100755 +index 0000000..e7a3df7 +--- /dev/null ++++ b/shrinkconfig.sh +@@ -0,0 +1,79 @@ ++#! /bin/bash ++ ++# shrinkconfig copyright 2006 by Rob Landley <rob@landley.net> ++# Licensed under the GNU General Public License version 2. ++ ++if [ $# -ne 1 ] ++then ++ echo "Turns current .config into a miniconfig file." ++ echo "Usage: shrinkconfig mini.config" ++ exit 1 ++fi ++ ++if [ ! -f .config ] ++then ++ echo "Need a .config file to shrink." ++ exit 1 ++fi ++LENGTH=$(wc -l < .config) ++ ++OUTPUT="$1" ++cp .config "$OUTPUT" ++if [ $? -ne 0 ] ++then ++ echo "Couldn't create $OUTPUT" ++ exit 1 ++fi ++ ++# If we get interrupted, clean up the mess ++ ++KERNELOUTPUT="" ++ ++function cleanup ++{ ++ echo ++ echo "Interrupted." ++ [ ! -z "$KERNELOUTPUT" ] && rm -rf "$KERNELOUTPUT" ++ rm "$OUTPUT" ++ exit 1 ++} ++ ++trap cleanup HUP INT QUIT TERM ++ ++# Since the "O=" argument to make doesn't work recursively, we need to jump ++# through a few hoops to avoid overwriting the .config that we're shrinking. ++ ++# If we're building out of tree, we'll have absolute paths to source and build ++# directories in the Makefile. ++ ++KERNELSRC=$(sed -n -e 's/KERNELSRC[^/]*:=[^/]*//p' Makefile) ++[ -z "$KERNELSRC" ] && KERNELSRC=$(pwd) ++KERNELOUTPUT=`pwd`/.config.minitemp ++ ++mkdir -p "$KERNELOUTPUT" || exit 1 ++ ++echo "Shrinking .config to $OUTPUT..." ++ ++for I in $(seq 1 $LENGTH) ++do ++ echo -n -e "\r"$I/$LENGTH lines $(wc -c < "$OUTPUT") bytes ++ ++ sed -n "${I}!p" "$OUTPUT" > "$KERNELOUTPUT"/.config.test ++ # Do a config with this file ++ make -C "$KERNELSRC" O="$KERNELOUTPUT" allnoconfig KCONFIG_ALLCONFIG="$KERNELOUTPUT"/.config.test > /dev/null ++ ++ # Compare. The date changes, so expect a small difference each time. ++ D=$(diff "$KERNELOUTPUT"/.config .config | wc -l) ++ if [ $D -eq 4 ] ++ then ++ mv "$KERNELOUTPUT"/.config.test "$OUTPUT" ++ LENGTH=$[$LENGTH-1] ++ else ++ I=$[$I + 1] ++ fi ++done ++ ++rm -rf "$KERNELOUTPUT" ++ ++# One extra echo to preserve status line. ++echo +diff --git a/usr/Makefile b/usr/Makefile +index 201f27f..8e1f6ea 100644 +--- a/usr/Makefile ++++ b/usr/Makefile +@@ -19,6 +19,7 @@ $(obj)/initramfs_data.o: $(obj)/initramfs_data.cpio.gz FORCE + + hostprogs-y := gen_init_cpio + initramfs := $(CONFIG_SHELL) $(srctree)/scripts/gen_initramfs_list.sh ++lzma_initramfs := $(CONFIG_SHELL) $(srctree)/scripts/gen_lzma_initramfs_list.sh + ramfs-input := $(if $(filter-out "",$(CONFIG_INITRAMFS_SOURCE)), \ + $(shell echo $(CONFIG_INITRAMFS_SOURCE)),-d) + ramfs-args := \ +@@ -36,6 +37,14 @@ endif + quiet_cmd_initfs = GEN $@ + cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input) + ++ifdef CONFIG_LZMA_INITRAM_FS_SMALLMEM ++quiet_cmd_lzma_initfs = LZRAMFS $@ ++ cmd_lzma_initfs = $(lzma_initramfs) -o $@ $(ramfs-args) -s $(ramfs-input) ++else ++quiet_cmd_lzma_initfs = LZRAMFS $@ ++ cmd_lzma_initfs = $(lzma_initramfs) -o $@ $(ramfs-args) $(ramfs-input) ++endif ++ + targets := initramfs_data.cpio.gz + # do not try to update files included in initramfs + $(deps_initramfs): ; +@@ -48,5 +57,9 @@ $(deps_initramfs): klibcdirs + # 4) arguments to gen_initramfs.sh changes + $(obj)/initramfs_data.cpio.gz: $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs + $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.gz.d ++ifdef CONFIG_LZMA_INITRAM_FS ++ $(call if_changed,lzma_initfs) ++else + $(call if_changed,initfs) ++endif + diff --git a/toolchain/kernel-headers/linux-2.6.21.5-ipmisensors-20070314-1214.patch b/toolchain/kernel-headers/linux-2.6.21.5-ipmisensors-20070314-1214.patch new file mode 100644 index 000000000..aca57c37b --- /dev/null +++ b/toolchain/kernel-headers/linux-2.6.21.5-ipmisensors-20070314-1214.patch @@ -0,0 +1,1914 @@ +diff -rduNp linux-2.6.20.3.orig/drivers/char/ipmi/ipmi_msghandler.c linux-2.6.20.3/drivers/char/ipmi/ipmi_msghandler.c +--- linux-2.6.20.3.orig/drivers/char/ipmi/ipmi_msghandler.c 2007-03-13 19:27:08.000000000 +0100 ++++ linux-2.6.20.3/drivers/char/ipmi/ipmi_msghandler.c 2007-03-14 14:23:02.000000000 +0100 +@@ -1954,6 +1954,24 @@ static void remove_proc_entries(ipmi_smi + #endif /* CONFIG_PROC_FS */ + } + ++/* ++ * Retrieves the bmc_device struct for a given ipmi interface number (or NULL if none). ++ */ ++struct device *ipmi_get_bmcdevice(int if_num) ++{ ++ ipmi_smi_t intf; ++ mutex_lock(&ipmi_interfaces_mutex); ++ list_for_each_entry_rcu(intf, &ipmi_interfaces, link) { ++ if (intf->intf_num == if_num){ ++ mutex_unlock(&ipmi_interfaces_mutex); ++ return &intf->bmc->dev->dev; ++ } ++ } ++ mutex_unlock(&ipmi_interfaces_mutex); ++ ++ return NULL; ++} ++ + static int __find_bmc_guid(struct device *dev, void *data) + { + unsigned char *id = data; +@@ -4183,3 +4201,4 @@ EXPORT_SYMBOL(ipmi_get_my_LUN); + EXPORT_SYMBOL(ipmi_smi_add_proc_entry); + EXPORT_SYMBOL(ipmi_user_set_run_to_completion); + EXPORT_SYMBOL(ipmi_free_recv_msg); ++EXPORT_SYMBOL(ipmi_get_bmcdevice); +diff -rduNp linux-2.6.20.3.orig/drivers/hwmon/Kconfig linux-2.6.20.3/drivers/hwmon/Kconfig +--- linux-2.6.20.3.orig/drivers/hwmon/Kconfig 2007-03-13 19:27:08.000000000 +0100 ++++ linux-2.6.20.3/drivers/hwmon/Kconfig 2007-03-14 14:23:02.000000000 +0100 +@@ -218,6 +218,16 @@ config SENSORS_GL520SM + This driver can also be built as a module. If so, the module + will be called gl520sm. + ++config SENSORS_IPMI ++ tristate "IPMI Hardware Monitoring Support" ++ depends on HWMON && IPMI_HANDLER && EXPERIMENTAL ++ help ++ If you say yes here you get support for sensors monitored by ++ an IPMI baseboard management controller (BMC). ++ ++ This driver can also be built as a module. If so, the module ++ will be called ipmisensors. ++ + config SENSORS_IT87 + tristate "ITE IT87xx and compatibles" + depends on HWMON && I2C +diff -rduNp linux-2.6.20.3.orig/drivers/hwmon/Makefile linux-2.6.20.3/drivers/hwmon/Makefile +--- linux-2.6.20.3.orig/drivers/hwmon/Makefile 2007-03-13 19:27:08.000000000 +0100 ++++ linux-2.6.20.3/drivers/hwmon/Makefile 2007-03-14 14:23:02.000000000 +0100 +@@ -28,6 +28,7 @@ obj-$(CONFIG_SENSORS_FSCPOS) += fscpos.o + obj-$(CONFIG_SENSORS_GL518SM) += gl518sm.o + obj-$(CONFIG_SENSORS_GL520SM) += gl520sm.o + obj-$(CONFIG_SENSORS_HDAPS) += hdaps.o ++obj-$(CONFIG_SENSORS_IPMI) += ipmisensors.o + obj-$(CONFIG_SENSORS_IT87) += it87.o + obj-$(CONFIG_SENSORS_K8TEMP) += k8temp.o + obj-$(CONFIG_SENSORS_LM63) += lm63.o +diff -rduNp linux-2.6.20.3.orig/drivers/hwmon/ipmisensors.c linux-2.6.20.3/drivers/hwmon/ipmisensors.c +--- linux-2.6.20.3.orig/drivers/hwmon/ipmisensors.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.20.3/drivers/hwmon/ipmisensors.c 2007-03-14 14:44:42.000000000 +0100 +@@ -0,0 +1,1552 @@ ++/* ++ * ipmisensors.c - lm-sensors/hwmon interface to IPMI sensors. ++ * ++ * Copyright (C) 2004-2006 Yani Ioannou <yani.ioannou@gmail.com> ++ * ++ * Adapted from bmcsensors (lm-sensors for linux 2.4) ++ * bmcsensors (C) Mark D. Studebaker <mdsxyz123@yahoo.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#include <linux/init.h> ++#include <linux/module.h> ++#include <linux/param.h> ++#include <linux/hwmon.h> ++#include <linux/list.h> ++#include <linux/slab.h> ++#include <linux/device.h> ++#include <linux/hwmon.h> ++ ++#include "ipmisensors.h" ++ ++/****** Function Prototypes ******/ ++static void ipmisensors_send_message(struct ipmisensors_bmc_data *bmc, ++ long msgid, struct kernel_ipmi_msg *msg); ++static void ipmisensors_reserve_sdr(struct ipmisensors_bmc_data *bmc); ++static void ipmisensors_get_sdr(struct ipmisensors_bmc_data *bmc, u16 res_id, ++ u16 record, u8 offset); ++static void ipmisensors_set_sensor_threshold(struct ipmisensors_bmc_data *bmc, ++ u8 number, int value, ++ int lim_index); ++static void ipmisensors_get_reading(struct ipmisensors_bmc_data *bmc, ++ struct sdrdata *sdr); ++static void ipmisensors_msg_handler(struct ipmi_recv_msg *msg, ++ void *user_msg_data); ++static int ipmisensors_intf_registered(int ipmi_intf); ++static int ipmisensors_bmc_registered(struct device *bmc); ++static void ipmisensors_register_bmc(int ipmi_intf, struct ipmi_addr *address); ++static void ipmisensors_unregister_bmc(int ipmi_intf); ++static void ipmisensors_unregister_bmc_all(void); ++static void ipmisensors_new_smi(int if_num, struct device *dev); ++static void ipmisensors_smi_gone(int if_num); ++static void ipmisensors_update_bmc(struct work_struct *); ++static void ipmisensors_cleanup(void); ++ ++/****** Static Vars ******/ ++ ++/* set when module is being removed */ ++static int cleanup = 0; ++ ++/* ipmisensors driver data */ ++static struct ipmisensors_data driver_data = { ++ .driver_name = "bmc", ++ .bmc_data = LIST_HEAD_INIT(driver_data.bmc_data), ++ .interfaces = 0, ++ .smi_watcher = { ++ .owner = THIS_MODULE, ++ .new_smi = ipmisensors_new_smi, ++ .smi_gone = ipmisensors_smi_gone, ++ }, ++ .ipmi_hndlrs = { ++ .ipmi_recv_hndl = ipmisensors_msg_handler, ++ }, ++}; ++ ++/* sensor refresh workqueue */ ++static struct workqueue_struct *ipmisensors_workqueue; ++ ++/****** SDR List Functions ******/ ++/** ++ * Creates a new sdrdata struct, or returns NULL if insufficient memory. ++ */ ++static struct sdrdata *ipmisensors_new_sdr(void) ++{ ++ struct sdrdata *sdr; ++ ++ sdr = kmem_cache_alloc(driver_data.sdrdata_cache, GFP_ATOMIC); ++ if (sdr) { ++ memset(sdr, 0, sizeof(struct sdrdata)); ++ } else { ++ printk(KERN_ERR ++ "ipmisensors: Couldn't allocate memory for new SDR\n"); ++ } ++ ++ return sdr; ++} ++ ++/** ++ * Adds the given sdrdata struct to the given bmc's SDR list. ++ * ++ * @bmc: the bmc to send the message to. ++ */ ++static inline void ipmisensors_add_sdr(struct ipmisensors_bmc_data *bmc, ++ struct sdrdata *sdr) ++{ ++ list_add(&sdr->list, &bmc->sdrs); ++ printk(KERN_DEBUG ++ "ipmisensors: SDR %d: type 0x%02x (%s)\n", ++ bmc->sdr_count, sdr->stype, sdr->id); ++ bmc->sdr_count++; ++} ++ ++/** ++ * Cleanup the sdr list for the given BMC. ++ * ++ * @bmc: the bmc to send the message to. ++ */ ++static void ipmisensors_sdr_cleanup(struct ipmisensors_bmc_data *bmc) ++{ ++ struct sdrdata *cursor, *next; ++ ++ /* find and free each sdr data struct */ ++ list_for_each_entry_safe(cursor, next, &bmc->sdrs, list) { ++ device_remove_file(bmc->dev, &cursor->attr.dev_attr); ++ device_remove_file(bmc->dev, &cursor->attr_min.dev_attr); ++ device_remove_file(bmc->dev, &cursor->attr_max.dev_attr); ++ device_remove_file(bmc->dev, &cursor->attr_label.dev_attr); ++ ++ kfree(cursor->attr_name); ++ kfree(cursor->attr_max_name); ++ kfree(cursor->attr_min_name); ++ kfree(cursor->attr_label_name); ++ ++ list_del(&cursor->list); ++ kmem_cache_free(driver_data.sdrdata_cache, cursor); ++ } ++} ++ ++/* worker function for workqueue ipmisensors_workqueue */ ++static void ipmisensors_update_bmc(struct work_struct *work) ++{ ++ struct ipmisensors_bmc_data *bmc = container_of(work, struct ipmisensors_bmc_data, update_work.work); ++ ++ /* don't start an update cycle if one already in progress */ ++ if (bmc->state != STATE_READING) { ++ struct sdrdata *cursor, *next; ++ bmc->state = STATE_READING; ++ printk(KERN_DEBUG "ipmisensors: starting update\n"); ++ ++ /* init semaphore to 1 for update cycle */ ++ sema_init(&bmc->update_semaphore, 1); ++ ++ /* update each sdr reading */ ++ list_for_each_entry_safe(cursor, next, &bmc->sdrs, list) { ++ ipmisensors_get_reading(bmc, cursor); ++ } ++ } ++ ++ /* wait for readings (need timeout?) */ ++ down_interruptible(&bmc->update_semaphore); ++ ++ printk(KERN_DEBUG "ipmisensors: update complete\n"); ++ ++ bmc->state = STATE_DONE; ++ ++ /* if the module isn't cleaning up, schedule another update */ ++ if (!cleanup) ++ queue_delayed_work(ipmisensors_workqueue, &bmc->update_work, ++ bmc->update_period * HZ); ++} ++ ++/****** IPMI Message Sending ******/ ++ ++/** ++ * Send a message to the IPMI BMC ++ * ++ * @bmc: the bmc to send the message to. ++ * @msgid: the message id to use. ++ * @msg: the ipmi message structure. ++ */ ++static void ipmisensors_send_message(struct ipmisensors_bmc_data *bmc, ++ long msgid, struct kernel_ipmi_msg *msg) ++{ ++ if (msg->data == NULL) ++ printk(KERN_DEBUG "ipmisensors: Send 0x%x\n", msg->cmd); ++ else ++ printk(KERN_DEBUG "ipmisensors: Send 0x%x 0x%x 0x%x\n", ++ msg->cmd, msg->data[0], msg->data[1]); ++ ++ /* This should be ipmi_request, but Corey had to remove ++ * that due to it being unused at the moment, as soon as ++ * this makes it into the kernel we should request it be re-instated. ++ */ ++ ipmi_request_settime(bmc->user, &bmc->address, msgid, msg, bmc, 0, ++ -1, 0); ++} ++ ++/** ++ * Compose and send a "reserve SDR" message ++ * ++ * @bmc: the bmc to send the message to. ++ */ ++static void ipmisensors_reserve_sdr(struct ipmisensors_bmc_data *bmc) ++{ ++ bmc->tx_message.netfn = IPMI_NETFN_STORAGE_REQUEST; ++ bmc->tx_message.cmd = IPMI_RESERVE_SDR; ++ bmc->tx_message.data_len = 0; ++ bmc->tx_message.data = NULL; ++ ++ ipmisensors_send_message(bmc, bmc->msgid++, &bmc->tx_message); ++} ++ ++/** ++ * Componse and send a "get SDR" message ++ * ++ * @bmc: the bmc to send the message to. ++ * @res_id: ++ * @record: ++ * @offset: ++ */ ++static void ipmisensors_get_sdr(struct ipmisensors_bmc_data *bmc, u16 res_id, ++ u16 record, u8 offset) ++{ ++ printk(KERN_DEBUG "ipmisensors: Get SDR 0x%x 0x%x 0x%x\n", ++ res_id, record, offset); ++ bmc->tx_message.netfn = IPMI_NETFN_STORAGE_REQUEST; ++ bmc->tx_message.cmd = IPMI_GET_SDR; ++ bmc->tx_message.data_len = 6; ++ bmc->tx_message.data = bmc->tx_msg_data; ++ bmc->tx_msg_data[0] = res_id & 0xff; ++ bmc->tx_msg_data[1] = res_id >> 8; ++ bmc->tx_msg_data[2] = record & 0xff; ++ bmc->tx_msg_data[3] = record >> 8; ++ bmc->tx_msg_data[4] = offset; ++ bmc->tx_msg_data[5] = bmc->ipmi_sdr_partial_size; ++ ++ ipmisensors_send_message(bmc, bmc->msgid++, &bmc->tx_message); ++} ++ ++/** ++ * Compose and send a "set sensor threshold" message ++ * ++ * @bmc: the bmc to send the message to. ++ * @id: the ipmi id number of the sensor. ++ * @value: the new value for the threshold. ++ * @lim_index: the index in the lim[] array for which this value applies. ++ */ ++static void ipmisensors_set_sensor_threshold(struct ipmisensors_bmc_data *bmc, ++ u8 number, int value, ++ int lim_index) ++{ ++ int i; ++ ++ printk(KERN_DEBUG "ipmisensors: Set SDR Threshold %d %d %d\n", ++ number, value, lim_index); ++ bmc->tx_message.netfn = IPMI_NETFN_STORAGE_REQUEST; ++ bmc->tx_message.cmd = IPMI_SET_SENSOR_THRESHOLD; ++ bmc->tx_message.data_len = 8; ++ bmc->tx_message.data = bmc->tx_msg_data; ++ bmc->tx_msg_data[0] = number & 0xff; ++ bmc->tx_msg_data[1] = 0x01 << lim_index; ++ ++ if (lim_index > 5 || lim_index < 0) { ++ printk(KERN_INFO ++ "ipmisensors: Error - ipmisensors_set_sensor_threshold given invalid lim_index\n"); ++ return; ++ } ++ ++ for (i = 2; i < 8; i++) ++ bmc->tx_msg_data[i] = 0x00; ++ ++ bmc->tx_msg_data[lim_index] = value && 0xff; ++ ++ ipmisensors_send_message(bmc, bmc->msgid++, &bmc->tx_message); ++} ++ ++/** ++ * Compose and send a "get sensor reading" message for the given sdr. ++ * ++ * @bmc: the bmc to send the message to. ++ * @sdr: the sdr of the sensor to get the reading for. ++ */ ++static void ipmisensors_get_reading(struct ipmisensors_bmc_data *bmc, ++ struct sdrdata *sdr) ++{ ++ bmc->tx_message.netfn = IPMI_NETFN_SENSOR_EVENT_REQUEST; ++ bmc->tx_message.cmd = IPMI_GET_SENSOR_STATE_READING; ++ bmc->tx_message.data_len = 1; ++ bmc->tx_message.data = bmc->tx_msg_data; ++ bmc->tx_msg_data[0] = sdr->number; ++ bmc->current_sdr = sdr; ++ ++ ipmisensors_send_message(bmc, bmc->msgid++, &bmc->tx_message); ++ down_interruptible(&bmc->update_semaphore); ++} ++ ++/****** IPMI Message Receiving ******/ ++ ++/** ++ * Process an sensor reading response message. ++ * ++ * @bmc: the bmc the message is from ++ * @msg: the IPMI SDR response message ++ */ ++static void ipmisensors_rcv_reading_msg(struct ipmisensors_bmc_data *bmc, ++ struct kernel_ipmi_msg *msg) ++{ ++ struct sdrdata *sdr = bmc->current_sdr; ++ ++ if (sdr == NULL) { ++ printk(KERN_ERR ++ "ipmisensors: Error ipmisensors_rcv_reading with NULL sdr\n"); ++ return; ++ } ++ ++ sdr->reading = msg->data[1]; ++ sdr->status = msg->data[2]; ++ sdr->thresholds = msg->data[3]; ++ ++ printk(KERN_DEBUG "ipmisensors: sensor %d (type %d) reading %d\n", ++ sdr->number, sdr->stype, msg->data[1]); ++ ++ up(&bmc->update_semaphore); ++} ++ ++/** ++ * Unpack based on string type, convert to normal, null terminate. ++ */ ++static void ipmisensors_sprintf(u8 * to, u8 * from, u8 type, u8 length) ++{ ++ static const u8 *bcdplus = "0123456789 -.:,_"; ++ int i; ++ ++ switch (type) { ++ case 0: /* unicode */ ++ for (i = 0; i < length; i++) ++ *to++ = (*from++ & 0x7f); ++ *to = 0; ++ break; ++ case 1: /* BCD Plus */ ++ for (i = 0; i < length; i++) ++ *to++ = bcdplus[*from++ & 0x0f]; ++ *to = 0; ++ break; ++ case 2: /* packed ascii *//* if not a mult. of 3 this will run over */ ++ for (i = 0; i < length; i += 3) { ++ *to++ = *from & 0x3f; ++ *to++ = *from >> 6 | ((*(from+1) & 0xf) << 2); ++ from++; ++ *to++ = *from >> 4 | ((*(from+1) & 0x3) << 4); ++ from++; ++ *to++ = (*from++ >> 2) & 0x3f; ++ } ++ *to = 0; ++ break; ++ case 3: /* normal */ ++ if (length > 1) ++ memcpy(to, from, length); ++ to[length] = 0; ++ break; ++ } ++} ++ ++/* IPMI V1.5 Section 30 */ ++static const int exps[] = ++ { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000 }; ++ ++/* Return 0 for fan, 2 for temp, 3 for voltage ++ We could make it variable based on the accuracy (= log10(m * 10**k2)); ++ this would work for /proc output, however libsensors resolution ++ is statically set in lib/chips.c */ ++static int decplaces(struct sdrdata *sd) ++{ ++ switch (sd->stype) { ++ case STYPE_TEMP: ++ return 2; ++ case STYPE_CURR: ++ case STYPE_VOLT: ++ return 3; ++ case STYPE_FAN: ++ default: ++ return 0; ++ } ++} ++ ++/* convert a raw value to a reading. IMPI V1.5 Section 30 */ ++static long conv_val(int value, struct sdrdata *sd) ++{ ++ u8 k1, k2; ++ long r; ++ ++ r = value * sd->m; ++ k1 = sd->k & 0x0f; ++ k2 = sd->k >> 4; ++ if (k1 < 8) ++ r += sd->b * exps[k1]; ++ else ++ r += sd->b / exps[16 - k1]; ++ r *= exps[decplaces(sd)]; ++ if (k2 < 8) { ++ if (sd->linear != 7) ++ r *= exps[k2]; ++ else ++ /* this will always truncate to 0: r = 1 / (exps[k2] * r); */ ++ r = 0; ++ } else { ++ if (sd->linear != 7) ++ r /= exps[16 - k2]; ++ else { ++ if (r != 0) ++ /* 1 / x * 10 ** (-m) == 10 ** m / x */ ++ r = exps[16 - k2] / r; ++ else ++ r = 0; ++ } ++ } ++ ++ return r; ++} ++ ++static const char *threshold_text[] = { ++ "upper non-recoverable threshold", ++ "upper critical threshold", ++ "upper non-critical threshold", ++ "lower non-recoverable threshold", ++ "lower critical threshold", ++ "lower non-critical threshold", ++ "positive-going hysteresis", ++ "negative-going hysteresis" /* unused */ ++}; ++ ++/* select two out of the 8 possible readable thresholds, and place indexes into the limits ++ array into lim1 and lim2. Set writable flags */ ++static void ipmisensors_select_thresholds(struct sdrdata *sd) ++{ ++ u8 capab = sd->capab; ++ u16 mask = sd->thresh_mask; ++ int tmp; ++ ++ sd->lim1 = -1; ++ sd->lim2 = -1; ++ sd->lim1_write = 0; ++ sd->lim2_write = 0; ++ ++ if (((capab & 0x0c) == 0x04) || /* readable thresholds ? */ ++ ((capab & 0x0c) == 0x08)) { ++ /* select upper threshold */ ++ if (mask & 0x10) { /* upper crit */ ++ sd->lim1 = 1; ++ if ((capab & 0x0c) == 0x08 && (mask & 0x1000)) ++ sd->lim1_write = 1; ++ } else if (mask & 0x20) { /* upper non-recov */ ++ sd->lim1 = 0; ++ if ((capab & 0x0c) == 0x08 && (mask & 0x2000)) ++ sd->lim1_write = 1; ++ } else if (mask & 0x08) { /* upper non-crit */ ++ sd->lim1 = 2; ++ if ((capab & 0x0c) == 0x08 && (mask & 0x0800)) ++ sd->lim1_write = 1; ++ } ++ ++ /* select lower threshold */ ++ if ((((capab & 0x30) == 0x10) || /* readable ? */ ++ ((capab & 0x30) == 0x20)) && /* pos hyst */ ++ sd->stype == STYPE_TEMP) ++ sd->lim2 = 6; ++ else if (mask & 0x02) { /* lower crit */ ++ sd->lim2 = 4; ++ if ((capab & 0x0c) == 0x08 && (mask & 0x0200)) ++ sd->lim2_write = 1; ++ } else if (mask & 0x04) { /* lower non-recov */ ++ sd->lim2 = 3; ++ if ((capab & 0x0c) == 0x08 && (mask & 0x0400)) ++ sd->lim2_write = 1; ++ } else if (mask & 0x01) { /* lower non-crit */ ++ sd->lim2 = 5; ++ if ((capab & 0x0c) == 0x08 && (mask & 0x0100)) ++ sd->lim2_write = 1; ++ } ++ } ++ ++ /* swap lim1/lim2 if m < 0 or function is 1/x (but not both!) */ ++ if ((sd->m < 0 && sd->linear != 7) || (sd->m >= 0 && sd->linear == 7)) { ++ tmp = sd->lim1; ++ sd->lim1 = sd->lim2; ++ sd->lim2 = tmp; ++ } ++ ++ if (sd->lim1 >= 0) ++ printk(KERN_INFO "ipmisensors: using %s for upper limit\n", ++ threshold_text[sd->lim1]); ++ else ++ printk(KERN_DEBUG "ipmisensors: no readable upper limit\n"); ++ ++ if (sd->lim2 >= 0) ++ printk(KERN_INFO "ipmisensors: using %s for lower limit\n", ++ threshold_text[sd->lim2]); ++ else ++ printk(KERN_DEBUG "ipmisensors: no readable lower limit\n"); ++} ++ ++/************* sysfs callback functions *********/ ++static ssize_t show_update_period(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct ipmisensors_bmc_device_attribute *aattr = ++ to_ipmisensors_bmc_dev_attr(attr); ++ ++ return snprintf(buf, 20, "%d\n", aattr->bmc->update_period); ++} ++ ++static ssize_t store_update_period(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ struct ipmisensors_bmc_device_attribute *aattr = ++ to_ipmisensors_bmc_dev_attr(attr); ++ ++ aattr->bmc->update_period = simple_strtoul(buf, NULL, 10);; ++ return count; ++}; ++ ++static ssize_t show_sensor(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ struct ipmisensors_device_attribute *sattr = ++ to_ipmisensors_dev_attr(attr); ++ return snprintf(buf, 20, "%ld\n", ++ conv_val(sattr->sdr->reading, sattr->sdr)); ++} ++ ++static ssize_t show_sensor_max(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ long max = 0; ++ struct ipmisensors_device_attribute *sattr = ++ to_ipmisensors_dev_attr(attr); ++ ++ if (sattr->sdr->lim1 >= 0) ++ max = conv_val(sattr->sdr->limits[sattr->sdr->lim1], ++ sattr->sdr); ++ return snprintf(buf, 20, "%ld\n", max); ++} ++ ++static ssize_t show_sensor_min(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ long min = 0; ++ struct ipmisensors_device_attribute *sattr = ++ to_ipmisensors_dev_attr(attr); ++ ++ if (sattr->sdr->lim2 >= 0) ++ min = conv_val(sattr->sdr->limits[sattr->sdr->lim2], ++ sattr->sdr); ++ return snprintf(buf, 20, "%ld\n", min); ++}; ++ ++static ssize_t show_sensor_label(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ u8 label[SDR_MAX_UNPACKED_ID_LENGTH]; ++ struct ipmisensors_device_attribute *sattr = ++ to_ipmisensors_dev_attr(attr); ++ ++ ipmisensors_sprintf(label, sattr->sdr->id, sattr->sdr->string_type, ++ sattr->sdr->id_length); ++ return snprintf(buf, 20, "%s\n", label); ++}; ++ ++static ssize_t store_sensor_max(struct device *dev, ++ struct device_attribute *attr, const char *buf, ++ size_t count) ++{ ++ long val = simple_strtoul(buf, NULL, 10); ++ struct ipmisensors_device_attribute *sattr = ++ to_ipmisensors_dev_attr(attr); ++ printk(KERN_DEBUG "ipmisensors: set max on sensor #%d to %ld", ++ sattr->sdr->number, val); ++ ipmisensors_set_sensor_threshold(sattr->sdr->bmc, sattr->sdr->number, ++ val, sattr->sdr->lim1); ++ return count; ++}; ++ ++static ssize_t store_sensor_min(struct device *dev, ++ struct device_attribute *attr, const char *buf, ++ size_t count) ++{ ++ long val = simple_strtoul(buf, NULL, 10); ++ struct ipmisensors_device_attribute *sattr = ++ to_ipmisensors_dev_attr(attr); ++ printk(KERN_DEBUG "ipmisensors: set min on sensor #%d to %ld", ++ sattr->sdr->number, val); ++ ipmisensors_set_sensor_threshold(sattr->sdr->bmc, sattr->sdr->number, ++ val, sattr->sdr->lim2); ++ return count; ++}; ++ ++static ssize_t show_alarms(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ struct ipmisensors_bmc_device_attribute *aattr = ++ to_ipmisensors_bmc_dev_attr(attr); ++ return snprintf(buf, 20, "%d\n", aattr->bmc->alarms); ++}; ++ ++static ssize_t show_name(struct device *dev, struct device_attribute *attr, ++ char *buf) ++{ ++ return snprintf(buf, 20, "%s\n", driver_data.driver_name); ++}; ++ ++/* work function to build the sysfs entries using the ipmi sdrs */ ++static void ipmisensors_build_sysfs(struct work_struct *work) ++{ ++ int temps = 0, volts = 0, currs = 0, fans = 0; ++ struct sdrdata *cursor, *next; ++ struct ipmisensors_bmc_data *bmc = container_of(work, struct ipmisensors_bmc_data, sysfs_work); ++ ++ /* find and create entries for each sdr data struct */ ++ list_for_each_entry_safe(cursor, next, &bmc->sdrs, list) { ++ u8 id[SDR_MAX_UNPACKED_ID_LENGTH]; ++ ++ cursor->attr_name = ++ (char *)kmalloc(sizeof(char) * MAX_FILENAME_LENGTH, ++ GFP_KERNEL); ++ cursor->attr_max_name = ++ (char *)kmalloc(sizeof(char) * MAX_FILENAME_LENGTH, ++ GFP_KERNEL); ++ cursor->attr_min_name = ++ (char *)kmalloc(sizeof(char) * MAX_FILENAME_LENGTH, ++ GFP_KERNEL); ++ ++ if (cursor->id_length > 0) { ++ cursor->attr_label_name = ++ (char *)kmalloc(sizeof(char) * MAX_FILENAME_LENGTH, ++ GFP_KERNEL); ++ ++ if (cursor->attr_label_name == NULL) { ++ printk(KERN_INFO ++ "ipmisensors: Out of memory (kmalloc failed)"); ++ kfree(cursor->attr_name); ++ kfree(cursor->attr_max_name); ++ kfree(cursor->attr_min_name); ++ return; ++ } ++ } ++ ++ if (cursor->attr_name == NULL || cursor->attr_max_name == NULL ++ || cursor->attr_min_name == NULL ++ || cursor->attr_label_name == NULL) { ++ printk(KERN_INFO ++ "ipmisensors: Out of memory (kmalloc failed)"); ++ kfree(cursor->attr_name); ++ kfree(cursor->attr_max_name); ++ kfree(cursor->attr_min_name); ++ kfree(cursor->attr_label_name); ++ return; ++ } ++ ++ switch (cursor->stype) { ++ case (STYPE_TEMP): ++ /* create the name of the sensor */ ++ snprintf(cursor->attr_name, MAX_FILENAME_LENGTH, ++ "temp%d_input", ++temps); ++ /* create min, max attributes */ ++ snprintf(cursor->attr_max_name, MAX_FILENAME_LENGTH, ++ "temp%d_max", temps); ++ snprintf(cursor->attr_min_name, MAX_FILENAME_LENGTH, ++ "temp%d_min", temps); ++ /* create the label of the sensor */ ++ snprintf(cursor->attr_label_name, MAX_FILENAME_LENGTH, ++ "temp%d_label", temps); ++ break; ++ case (STYPE_VOLT): ++ /* create the name of the sensor */ ++ snprintf(cursor->attr_name, MAX_FILENAME_LENGTH, ++ "in%d_input", ++volts); ++ /* create min, max attributes */ ++ snprintf(cursor->attr_max_name, MAX_FILENAME_LENGTH, ++ "in%d_max", volts); ++ snprintf(cursor->attr_min_name, MAX_FILENAME_LENGTH, ++ "in%d_min", volts); ++ /* create the label of the sensor */ ++ snprintf(cursor->attr_label_name, MAX_FILENAME_LENGTH, ++ "in%d_label", volts); ++ break; ++ case (STYPE_CURR): ++ /* create the name of the sensor */ ++ snprintf(cursor->attr_name, MAX_FILENAME_LENGTH, ++ "curr%d_input", ++currs); ++ /* create min, max attributes */ ++ sprintf(cursor->attr_max_name, "curr%d_max", currs); ++ sprintf(cursor->attr_min_name, "curr%d_min", currs); ++ /* create the label of the sensor */ ++ snprintf(cursor->attr_label_name, MAX_FILENAME_LENGTH, ++ "curr%d_label", currs); ++ break; ++ case (STYPE_FAN): ++ /* create the name of the sensor */ ++ snprintf(cursor->attr_name, MAX_FILENAME_LENGTH, ++ "fan%d_input", ++fans); ++ /* create min, max attributes */ ++ sprintf(cursor->attr_max_name, "fan%d_max", fans); ++ sprintf(cursor->attr_min_name, "fan%d_min", fans); ++ /* create the label of the sensor */ ++ snprintf(cursor->attr_label_name, MAX_FILENAME_LENGTH, ++ "fan%d_label", fans); ++ break; ++ default: ++ printk(KERN_INFO "ipmisensors: unkown sensor type\n"); ++ continue; ++ } ++ ++ cursor->attr.dev_attr.attr.name = cursor->attr_name; ++ cursor->attr.dev_attr.attr.mode = S_IRUGO; ++ cursor->attr.dev_attr.attr.owner = THIS_MODULE; ++ cursor->attr.dev_attr.show = show_sensor; ++ cursor->attr.dev_attr.store = NULL; ++ cursor->attr.sdr = cursor; ++ ++ cursor->attr_min.dev_attr.attr.name = cursor->attr_min_name; ++ cursor->attr_min.dev_attr.attr.owner = THIS_MODULE; ++ cursor->attr_min.dev_attr.show = show_sensor_min; ++ cursor->attr_min.sdr = cursor; ++ ++ if (cursor->lim2_write) { ++ printk(KERN_INFO ++ "ipmisensors: You have a writable sensor threshold! Send me an e-mail at <yani.ioannou@gmail.com>.\n"); ++ cursor->attr_min.dev_attr.store = store_sensor_min; ++ cursor->attr_min.dev_attr.attr.mode = S_IWUSR | S_IRUGO; ++ } else { ++ cursor->attr_min.dev_attr.store = NULL; ++ cursor->attr_min.dev_attr.attr.mode = S_IRUGO; ++ } ++ ++ cursor->attr_max.dev_attr.attr.name = cursor->attr_max_name; ++ cursor->attr_max.dev_attr.attr.owner = THIS_MODULE; ++ cursor->attr_max.dev_attr.show = show_sensor_max; ++ cursor->attr_max.sdr = cursor; ++ ++ if (cursor->lim1_write) { ++ printk(KERN_INFO ++ "ipmisensors: You have a writable sensor threshold! Send me an e-mail at <yani.ioannou@gmail.com>.\n"); ++ cursor->attr_max.dev_attr.store = store_sensor_max; ++ cursor->attr_max.dev_attr.attr.mode = S_IWUSR | S_IRUGO; ++ } else { ++ cursor->attr_max.dev_attr.store = NULL; ++ cursor->attr_max.dev_attr.attr.mode = S_IRUGO; ++ } ++ ++ if (cursor->id_length > 0) { ++ cursor->attr_label.dev_attr.attr.name = ++ cursor->attr_label_name; ++ cursor->attr_label.dev_attr.attr.mode = S_IRUGO; ++ cursor->attr_label.dev_attr.attr.owner = THIS_MODULE; ++ cursor->attr_label.dev_attr.show = show_sensor_label; ++ cursor->attr_label.dev_attr.store = NULL; ++ cursor->attr_label.sdr = cursor; ++ } ++ ++ printk(KERN_INFO ++ "ipmisensors: registering sensor %d: (type 0x%.2x) " ++ "(fmt=%d; m=%d; b=%d; k1=%d; k2=%d; cap=0x%.2x; mask=0x%.4x)\n", ++ cursor->number, cursor->stype, cursor->format, cursor->m, ++ cursor->b, cursor->k & 0xf, cursor->k >> 4, ++ cursor->capab, cursor->thresh_mask); ++ ++ if (cursor->id_length > 0) { ++ ipmisensors_sprintf(id, cursor->id, cursor->string_type, ++ cursor->id_length); ++ switch (cursor->stype) { ++ case (STYPE_TEMP): ++ printk(KERN_INFO ++ "ipmisensors: sensors.conf: label temp%d \"%s\"\n", ++ temps, id); ++ break; ++ case (STYPE_VOLT): ++ printk(KERN_INFO ++ "ipmisensors: sensors.conf: label in%d \"%s\"\n", ++ volts, id); ++ break; ++ case (STYPE_CURR): ++ printk(KERN_INFO ++ "ipmisensors: sensors.conf: label curr%d \"%s\"\n", ++ currs, id); ++ break; ++ case (STYPE_FAN): ++ printk(KERN_INFO ++ "ipmisensors: sensors.conf: label fan%d \"%s\"\n", ++ fans, id); ++ break; ++ } ++ } ++ ++ ipmisensors_select_thresholds(cursor); ++ ++ if (cursor->linear != 0 && cursor->linear != 7) { ++ printk(KERN_INFO ++ "ipmisensors: sensor %d: nonlinear function 0x%.2x unsupported, expect bad results\n", ++ cursor->number, cursor->linear); ++ } ++ ++ if ((cursor->format & 0x03) == 0x02) { ++ printk(KERN_INFO ++ "ipmisensors: sensor %d: 1's complement format unsupported, expect bad results\n", ++ cursor->number); ++ } else if ((cursor->format & 0x03) == 0x03) { ++ printk(KERN_INFO ++ "ipmisensors: sensor %d: threshold sensor only, no readings available", ++ cursor->number); ++ } ++ ++ if (cursor->lim1_write || cursor->lim2_write) ++ cursor->attr.dev_attr.attr.mode = 0644; ++ else ++ cursor->attr.dev_attr.attr.mode = 0444; ++ ++ if (device_create_file(bmc->dev, &cursor->attr.dev_attr) < 0 ++ || device_create_file(bmc->dev, ++ &cursor->attr_min.dev_attr) < 0 ++ || device_create_file(bmc->dev, ++ &cursor->attr_max.dev_attr) < 0 ++ || (cursor->id_length > ++ 0 ? device_create_file(bmc->dev, ++ &cursor->attr_label.dev_attr) < ++ 0 : 0) ++ ) { ++ printk(KERN_INFO ++ "ipmisensors: sysfs file creation failed for SDR %d (%s).\n", ++ cursor->number, cursor->id); ++ kfree(cursor->attr_name); ++ kfree(cursor->attr_max_name); ++ kfree(cursor->attr_min_name); ++ kfree(cursor->attr_label_name); ++ return; ++ } ++ } ++ ++ bmc->alarms_attr.dev_attr.attr.name = "alarms"; ++ bmc->alarms_attr.dev_attr.attr.mode = S_IRUGO; ++ bmc->alarms_attr.dev_attr.attr.owner = THIS_MODULE; ++ bmc->alarms_attr.dev_attr.show = show_alarms; ++ bmc->alarms_attr.dev_attr.store = NULL; ++ bmc->alarms_attr.bmc = bmc; ++ ++ if (device_create_file(bmc->dev, &bmc->alarms_attr.dev_attr) < 0) { ++ printk(KERN_INFO ++ "ipmisensors: Failed to create sysfs entry 'alarms'"); ++ return; ++ } ++ ++ bmc->name_attr.attr.name = "name"; ++ bmc->name_attr.attr.mode = S_IRUGO; ++ bmc->name_attr.attr.owner = THIS_MODULE; ++ bmc->name_attr.show = show_name; ++ ++ if (device_create_file(bmc->dev, &bmc->name_attr) < 0) { ++ printk(KERN_INFO ++ "ipmisensors: Failed to create sysfs entry 'name'"); ++ return; ++ } ++ ++ bmc->update_attr.dev_attr.attr.name = "update_period"; ++ bmc->update_attr.dev_attr.attr.mode = S_IWUSR | S_IRUGO; ++ bmc->update_attr.dev_attr.attr.owner = THIS_MODULE; ++ bmc->update_attr.dev_attr.show = show_update_period; ++ bmc->update_attr.dev_attr.store = store_update_period; ++ bmc->update_attr.bmc = bmc; ++ ++ if (device_create_file(bmc->dev, &bmc->update_attr.dev_attr) < 0) { ++ printk(KERN_INFO ++ "ipmisensors: Failed to create sysfs entry 'update_period'"); ++ return; ++ } ++ ++ printk(KERN_INFO ++ "ipmisensors: registered %d temp, %d volt, %d current, %d fan sensors\n", ++ temps, volts, currs, fans); ++ ++ /* This completes the initialization. We can now kickoff the ++ * periodic update of the bmc sensor's values by scheduling ++ * the first work. ++ */ ++ queue_work(ipmisensors_workqueue, &bmc->update_work.work); ++ ++} ++ ++/** ++ * Process an SDR response message, save the SDRs we like in the sdr ++ * list for the given BMC. ++ * ++ * @bmc: the bmc the message is from ++ * @msg: the IPMI SDR response message ++ */ ++static void ipmisensors_rcv_sdr_msg(struct ipmisensors_bmc_data *bmc, ++ struct kernel_ipmi_msg *msg) ++{ ++ u16 record; ++ int type; ++ int stype; ++ int id_length; ++ int i; ++ int ipmi_ver = 0; ++ unsigned char *data; ++ u8 id[SDR_MAX_UNPACKED_ID_LENGTH]; ++ struct sdrdata *sdr; ++ ++ if (msg->data[0] != 0) { ++ /* cut request in half and try again */ ++ bmc->ipmi_sdr_partial_size /= 2; ++ if (bmc->ipmi_sdr_partial_size < 8) { ++ printk(KERN_INFO ++ "ipmisensors: IPMI buffers too small, giving up\n"); ++ bmc->state = STATE_DONE; ++ return; ++ } ++ printk(KERN_DEBUG ++ "ipmisensors: Reducing SDR request size to %d\n", ++ bmc->ipmi_sdr_partial_size); ++ ++ ipmisensors_get_sdr(bmc, 0, 0, 0); ++ bmc->state = STATE_SDR; ++ return; ++ } ++ if (bmc->ipmi_sdr_partial_size < IPMI_SDR_SIZE) { ++ if (bmc->rx_msg_data_offset == 0) { ++ memcpy(bmc->rx_msg_data, msg->data, ++ bmc->ipmi_sdr_partial_size + 3); ++ bmc->rx_msg_data_offset = ++ bmc->ipmi_sdr_partial_size + 3; ++ } else { ++ memcpy(bmc->rx_msg_data + bmc->rx_msg_data_offset, ++ msg->data + 3, bmc->ipmi_sdr_partial_size); ++ bmc->rx_msg_data_offset += bmc->ipmi_sdr_partial_size; ++ } ++ if (bmc->rx_msg_data_offset > bmc->rx_msg_data[7] + 7) { ++ /* got last chunk */ ++ bmc->rx_msg_data_offset = 0; ++ data = bmc->rx_msg_data; ++ } else { ++ /* get more */ ++ record = ++ (bmc->rx_msg_data[4] << 8) | bmc->rx_msg_data[3]; ++ ipmisensors_get_sdr(bmc, bmc->resid, record, ++ bmc->rx_msg_data_offset - 3); ++ bmc->state = STATE_SDR; ++ return; ++ } ++ } else { ++ /* got it in one chunk */ ++ data = msg->data; ++ } ++ ++ bmc->nextrecord = (data[2] << 8) | data[1]; ++ ++ /* If the ipmi version is 0.9 we have to remap some things. ++ * Yes this is very ugly, but we aren't the ones who ++ * implemented an incomplete spec! ++ */ ++ ipmi_ver = data[5]; ++ ++ type = data[6]; ++ /* known SDR type */ ++ if (type == 1 || type == 2) { ++ stype = data[(ipmi_ver == 0x90 ? 16 : 15)]; ++ /* known sensor type */ ++ if (stype <= STYPE_MAX) { ++ if (data[(ipmi_ver == 0x90 ? 17 : 16)] != 0x01) { ++ if (type == 1) ++ ipmisensors_sprintf(id, &data[51], ++ data[50] >> 6, ++ data[50] & 0x1f); ++ else ++ ipmisensors_sprintf(id, ++ &data[(ipmi_ver == ++ 0x90 ? 30 : ++ 35)], ++ data[(ipmi_ver == ++ 0x90 ? 29 : ++ 34)] >> 6, ++ data[(ipmi_ver == ++ 0x90 ? 29 : ++ 34)] & 0x1f); ++ printk(KERN_INFO ++ "ipmisensors: skipping non-threshold sensor \"%s\"\n", ++ id); ++ } else { ++ /* add entry to sdrd table */ ++ sdr = ipmisensors_new_sdr(); ++ if (!sdr) { ++ printk(KERN_ERR ++ "ipmisensors: could not allocate memory for new SDR"); ++ return; ++ } ++ sdr->bmc = bmc; ++ sdr->stype = stype; ++ sdr->number = data[10]; ++ sdr->capab = data[(ipmi_ver == 0x90 ? 15 : 14)]; ++ sdr->thresh_mask = ++ (((u16) data[(ipmi_ver == 0x90 ? 21 : 22)]) ++ << 8) | data[21]; ++ if (type == 1) { ++ sdr->format = ++ data[(ipmi_ver == ++ 0x90 ? 22 : 24)] >> 6; ++ sdr->linear = ++ data[(ipmi_ver == ++ 0x90 ? 25 : 26)] & 0x7f; ++ sdr->m = ++ data[(ipmi_ver == 0x90 ? 26 : 27)]; ++ sdr->m |= ((u16) ++ (data ++ [(ipmi_ver == ++ 0x90 ? 27 : 28)] ++ & 0xc0)) << 2; ++ if (sdr->m & 0x0200) { ++ /* sign extend */ ++ sdr->m |= 0xfc00; ++ } ++ sdr->b = ++ data[(ipmi_ver == 0x90 ? 28 : 29)]; ++ sdr->b |= ((u16) ++ (data ++ [(ipmi_ver == ++ 0x90 ? 29 : 30)] ++ & 0xc0)) << 2; ++ if (sdr->b & 0x0200) { ++ /* sign extend */ ++ sdr->b |= 0xfc00; ++ } ++ sdr->k = ++ data[(ipmi_ver == 0x90 ? 31 : 32)]; ++ sdr->nominal = ++ data[(ipmi_ver == 0x90 ? 33 : 34)]; ++ for (i = 0; i < SDR_LIMITS; i++) { ++ /* assume readable */ ++ sdr->limits[i] = ++ data[(ipmi_ver == ++ 0x90 ? 40 : 39) + i]; ++ } ++ sdr->string_type = data[50] >> 6; ++ id_length = data[50] & 0x1f; ++ memcpy(sdr->id, &data[51], id_length); ++ sdr->id_length = id_length; ++ } else { ++ sdr->m = 1; ++ sdr->b = 0; ++ sdr->k = 0; ++ sdr->string_type = ++ data[(ipmi_ver == ++ 0x90 ? 29 : 34)] >> 6; ++ id_length = data[34] & 0x1f; ++ if (id_length > 0) { ++ memcpy(sdr->id, ++ &data[(ipmi_ver == ++ 0x90 ? 30 : 35)], ++ id_length); ++ } ++ sdr->id_length = id_length; ++ /* limits?? */ ++ if (ipmi_ver == 0x90) { ++ memcpy(sdr->id, ++ &data[30], id_length); ++ sdr->id_length = id_length; ++ } ++ } ++ ipmisensors_add_sdr(bmc, sdr); ++ } ++ } ++ /* peek at the other SDR types */ ++ } else if (type == 0x10 || type == 0x11 || type == 0x12) { ++ ipmisensors_sprintf(id, data + 19, data[18] >> 6, ++ data[18] & 0x1f); ++ if (type == 0x10) { ++ printk(KERN_INFO ++ "ipmisensors: Generic Device acc=0x%x; slv=0x%x; lun=0x%x; type=0x%x; \"%s\"\n", ++ data[8], data[9], data[10], data[13], id); ++ } else if (type == 0x11) { ++ printk(KERN_INFO ++ "ipmisensors: FRU Device acc=0x%x; slv=0x%x; log=0x%x; ch=0x%x; type=0x%x; \"%s\"\n", ++ data[8], data[9], data[10], data[11], data[13], ++ id); ++ } else { ++ printk(KERN_INFO ++ "ipmisensors: Mgmt Ctllr Device slv=0x%x; \"%s\"\n", ++ data[8], id); ++ } ++ } else if (type == 0x14) { ++ printk(KERN_INFO ++ "ipmisensors: Message Channel Info Records:\n"); ++ for (i = 0; i < 8; i++) { ++ printk(KERN_INFO "ipmisensors: Channel %d info 0x%x\n", ++ i, data[9 + i]); ++ } ++ } else { ++ printk(KERN_INFO "ipmisensors: Skipping SDR type 0x%x\n", type); ++ } ++ if (ipmi_ver != 0x90) { ++ if (bmc->nextrecord >= 6224) { ++ /*YJ stop sensor scan on poweredge 1750 */ ++ bmc->nextrecord = 0xffff; ++ } ++ } ++ ++ if (bmc->nextrecord == 0xFFFF) { ++ if (bmc->sdr_count == 0) { ++ printk(KERN_INFO ++ "ipmisensors: No recognized sensors found.\n"); ++ bmc->state = STATE_DONE; ++ } else { ++ printk(KERN_INFO "ipmisensors: all sensors detected\n"); ++ bmc->state = STATE_SYSTABLE; ++ ++ /* Schedule sysfs build/registration work */ ++ INIT_WORK(&bmc->sysfs_work, ipmisensors_build_sysfs); ++ queue_work(ipmisensors_workqueue, &bmc->sysfs_work); ++ } ++ } else { ++ ipmisensors_get_sdr(bmc, 0, bmc->nextrecord, 0); ++ bmc->state = STATE_SDR; ++ } ++} ++ ++/** ++ * Process incoming messages based on internal state ++ * ++ * @bmc: the bmc the message is from. ++ * @msg: the ipmi message to process. ++ */ ++static void ipmisensors_rcv_msg(struct ipmisensors_bmc_data *bmc, ++ struct kernel_ipmi_msg *msg) ++{ ++ switch (bmc->state) { ++ case STATE_INIT: ++ case STATE_RESERVE: ++ bmc->resid = (((u16) msg->data[2]) << 8) | msg->data[1]; ++ ++ printk(KERN_DEBUG "ipmisensors: Got first resid 0x%.4x\n", ++ bmc->resid); ++ ++ ipmisensors_get_sdr(bmc, 0, 0, 0); ++ bmc->state = STATE_SDR; ++ break; ++ ++ case STATE_SDR: ++ case STATE_SDRPARTIAL: ++ ipmisensors_rcv_sdr_msg(bmc, msg); ++ break; ++ ++ case STATE_READING: ++ ipmisensors_rcv_reading_msg(bmc, msg); ++ break; ++ ++ case STATE_UNCANCEL: ++ bmc->resid = (((u16) msg->data[2]) << 8) | msg->data[1]; ++ ++ printk(KERN_DEBUG "ipmisensors: Got new resid 0x%.4x\n", ++ bmc->resid); ++ ++ bmc->rx_msg_data_offset = 0; ++ ipmisensors_get_sdr(bmc, 0, bmc->nextrecord, 0); ++ bmc->state = STATE_SDR; ++ break; ++ ++ case STATE_DONE: ++ case STATE_SYSTABLE: ++ break; ++ default: ++ bmc->state = STATE_INIT; ++ } ++} ++ ++/** ++ * Callback to handle a received IPMI message from a given BMC. ++ * ++ * @msg: the received message. ++ * @handler_data: a pointer to the particular bmc ipmisensors_bmc_data struct. ++ */ ++static void ipmisensors_msg_handler(struct ipmi_recv_msg *msg, ++ void *user_msg_data) ++{ ++ struct ipmisensors_bmc_data *bmc = ++ (struct ipmisensors_bmc_data *)user_msg_data; ++ ++ if (msg->msg.data[0] != 0) ++ printk(KERN_WARNING ++ "ipmisensors: Error 0x%x on cmd 0x%x/0x%x\n", ++ msg->msg.data[0], msg->msg.netfn, msg->msg.cmd); ++ ++ if (bmc != NULL && ipmisensors_intf_registered(bmc->interface_id)) { ++ if (bmc->state == STATE_SDR && ++ msg->msg.data[0] == IPMI_INVALID_RESERVATION_ID) { ++ /* reservation cancelled, get new resid */ ++ if (++bmc->errorcount > 275) { ++ printk(KERN_ERR ++ "ipmisensors: Too many reservations cancelled, giving up\n"); ++ bmc->state = STATE_DONE; ++ } else { ++ printk(KERN_DEBUG ++ "ipmisensors: resid 0x%04x cancelled, getting new one\n", ++ bmc->resid); ++ ++ ipmisensors_reserve_sdr(bmc); ++ bmc->state = STATE_UNCANCEL; ++ } ++ } else if (msg->msg.data[0] != IPMI_CC_NO_ERROR && ++ msg->msg.data[0] != IPMI_ERR_RETURNING_REQ_BYTES && ++ msg->msg.data[0] != IPMI_ERR_PROVIDING_RESPONSE) { ++ printk(KERN_ERR ++ "ipmisensors: Error 0x%x on cmd 0x%x/0x%x; state = %d; probably fatal.\n", ++ msg->msg.data[0], msg->msg.netfn & 0xfe, ++ msg->msg.cmd, bmc->state); ++ } else { ++ printk(KERN_DEBUG "ipmisensors: received message\n"); ++ ipmisensors_rcv_msg(bmc, &msg->msg); ++ } ++ ++ } else { ++ printk(KERN_WARNING ++ "ipmisensors: Response for non-registered BMC\n"); ++ if (bmc != NULL) ++ printk(KERN_DEBUG "ipmisensors: BMC ID: %d\n", ++ bmc->interface_id); ++ else ++ printk(KERN_DEBUG "ipmisensors: BMC NULL!\n"); ++ } ++ ++ ipmi_free_recv_msg(msg); ++} ++ ++/****** IPMI Interface Initialization ******/ ++ ++/** ++ * Return true if the given ipmi interface has been registered. ++ * ++ * @ipmi_intf: The IPMI interface number. ++ */ ++static int ipmisensors_intf_registered(int ipmi_intf) ++{ ++ int found = 0; ++ struct ipmisensors_bmc_data *cursor, *next; ++ ++ /* find and free the ipmisensors_bmc_data struct */ ++ list_for_each_entry_safe(cursor, next, &driver_data.bmc_data, list) { ++ if (cursor->interface_id == ipmi_intf) { ++ found++; ++ } ++ } ++ ++ return found; ++} ++ ++/** ++ * Return true if the given BMC has been registered. ++ * ++ * @bmc: The BMC device. ++ */ ++static int ipmisensors_bmc_registered(struct device *bmc) ++{ ++ int found = 0; ++ struct ipmisensors_bmc_data *cursor, *next; ++ ++ /* find and free the ipmisensors_bmc_data struct */ ++ list_for_each_entry_safe(cursor, next, &driver_data.bmc_data, list) { ++ if (cursor->dev == bmc) { ++ found++; ++ } ++ } ++ ++ return found; ++} ++ ++/** ++ * Register new IPMI BMC interface. Interface indpendent callback created ++ * for flexibility in adding new types of interface callbacks in future. ++ * ++ * @ipmi_intf: The IPMI interface number. ++ */ ++static void ipmisensors_register_bmc(int ipmi_intf, struct ipmi_addr *address) ++{ ++ int error; ++ ++ /* allocate a new ipmisensors_bmc_data struct */ ++ ++ struct ipmisensors_bmc_data *bmc = (struct ipmisensors_bmc_data *) ++ kmalloc(sizeof(struct ipmisensors_bmc_data), GFP_KERNEL); ++ ++ /* initialize members */ ++ INIT_LIST_HEAD(&bmc->sdrs); ++ bmc->interface_id = ipmi_intf; ++ ++ bmc->address = *address; ++ ++ bmc->sdr_count = 0; ++ bmc->msgid = 0; ++ bmc->ipmi_sdr_partial_size = IPMI_CHUNK_SIZE; ++ bmc->state = STATE_INIT; ++ bmc->errorcount = 0; ++ bmc->rx_msg_data_offset = 0; ++ bmc->dev = ipmi_get_bmcdevice(ipmi_intf); ++ ++ /* default to 3 second min update interval */ ++ bmc->update_period = 3; ++ ++ if (bmc->dev == NULL) { ++ printk(KERN_ERR ++ "ipmisensors: Error, couldn't get BMC device for interface %d\n", ++ bmc->interface_id); ++ kfree(bmc); ++ return; ++ } ++ ++ /* Create IPMI messaging interface user */ ++ error = ipmi_create_user(bmc->interface_id, &driver_data.ipmi_hndlrs, ++ bmc, &bmc->user); ++ if (error < 0) { ++ printk(KERN_ERR ++ "ipmisensors: Error, unable to register user with ipmi interface %d\n", ++ bmc->interface_id); ++ kfree(bmc); ++ return; ++ } ++ ++ /* Register the BMC as a HWMON class device */ ++ bmc->class_dev = hwmon_device_register(bmc->dev); ++ ++ if (IS_ERR(bmc->class_dev)) { ++ printk(KERN_ERR ++ "ipmisensors: Error, unable to register hwmon class device for interface %d\n", ++ bmc->interface_id); ++ kfree(bmc); ++ return; ++ } ++ ++ /* Register the BMC in the driver */ ++ if (ipmisensors_bmc_registered(bmc->dev)) { ++ printk(KERN_ERR ++ "ipmisensors: BMC on interface %d already registered\n", ++ bmc->interface_id); ++ hwmon_device_unregister(bmc->class_dev); ++ kfree(bmc); ++ return; ++ } ++ ++ ipmi_get_version(bmc->user, &bmc->ipmi_version_major, ++ &bmc->ipmi_version_minor); ++ ++ /* finally add the new bmc data to the bmc data list */ ++ list_add_tail(&bmc->list, &driver_data.bmc_data); ++ driver_data.interfaces++; ++ ++ printk(KERN_INFO ++ "ipmisensors: Registered IPMI %d.%d BMC over interface %d\n", ++ bmc->ipmi_version_major, ++ bmc->ipmi_version_minor, bmc->interface_id); ++ ++ /* Send a reserve SDR command to the bmc */ ++ ipmisensors_reserve_sdr(bmc); ++ ++ /* initialize the bmc's update work struct */ ++ INIT_DELAYED_WORK(&bmc->update_work, ipmisensors_update_bmc); ++} ++ ++/** ++ * Callback for when an IPMI BMC is gone. Interface indpendent callback created ++ * for flexibility in adding new types of interface callbacks in future. ++ * ++ * @ipmi_intf: The IPMI interface number. ++ */ ++static void ipmisensors_unregister_bmc(int ipmi_intf) ++{ ++ struct ipmisensors_bmc_data *cursor, *next; ++ ++ /* find and free the ipmisensors_bmc_data struct */ ++ list_for_each_entry_safe(cursor, next, &driver_data.bmc_data, list) { ++ if (cursor->interface_id == ipmi_intf) { ++ list_del(&cursor->list); ++ printk(KERN_DEBUG ++ "ipmisensors: cancelling queued work\n"); ++ /* cancel update work queued for this bmc */ ++ cancel_delayed_work(&cursor->update_work); ++ printk(KERN_DEBUG ++ "ipmisensors: waiting for update to finish\n"); ++ /* wait for readings to finish */ ++ while (cursor->state != STATE_DONE) ; ++ ++ device_remove_file(cursor->dev, ++ &cursor->alarms_attr.dev_attr); ++ device_remove_file(cursor->dev, ++ &cursor->update_attr.dev_attr); ++ hwmon_device_unregister(cursor->class_dev); ++ ipmisensors_sdr_cleanup(cursor); ++ ipmi_destroy_user(cursor->user); ++ ++ printk(KERN_INFO ++ "ipmisensors: Unegistered IPMI interface %d\n", ++ cursor->interface_id); ++ ++ kfree(cursor); ++ driver_data.interfaces--; ++ } ++ } ++ ++} ++ ++/** ++ * Unregister all registered bmcs. ++ */ ++static void ipmisensors_unregister_bmc_all(void) ++{ ++ struct ipmisensors_bmc_data *cursor, *next; ++ ++ /* find and free the ipmisensors_bmc_data struct */ ++ list_for_each_entry_safe(cursor, next, &driver_data.bmc_data, list) { ++ list_del(&cursor->list); ++ ++ /* cancel update work queued for this bmc */ ++ printk(KERN_DEBUG "ipmisensors: cancelling queued work\n"); ++ cancel_delayed_work(&cursor->update_work); ++ ++ printk(KERN_DEBUG ++ "ipmisensors: waiting for update to finish\n"); ++ /* wait for readings to finish */ ++ while (cursor->state != STATE_DONE) ; ++ ++ device_remove_file(cursor->dev, &cursor->alarms_attr.dev_attr); ++ device_remove_file(cursor->dev, &cursor->update_attr.dev_attr); ++ hwmon_device_unregister(cursor->class_dev); ++ ipmisensors_sdr_cleanup(cursor); ++ ipmi_destroy_user(cursor->user); ++ ++ printk(KERN_INFO ++ "ipmisensors: Unegistered IPMI interface %d\n", ++ cursor->interface_id); ++ ++ kfree(cursor); ++ } ++ ++ driver_data.interfaces = 0; ++} ++ ++/** ++ * Callback for when a new IPMI SMI type interface is found. ++ * ++ * @if_num: The IPMI interface number. ++ */ ++static void ipmisensors_new_smi(int if_num, struct device *dev) ++{ ++ struct ipmi_addr smi_address = { ++ IPMI_SYSTEM_INTERFACE_ADDR_TYPE, ++ IPMI_BMC_CHANNEL, ++ {0}, ++ }; ++ ++ /* calls the generic new interface function */ ++ ipmisensors_register_bmc(if_num, &smi_address); ++} ++ ++/** ++ * Callback for when an exisiting IPMI SMI type interface is gone. ++ * ++ * @if_num: The IPMI interface number. ++ */ ++static void ipmisensors_smi_gone(int if_num) ++{ ++ if (driver_data.interfaces > 0) { ++ ipmisensors_unregister_bmc(if_num); ++ } ++} ++ ++/** ++ * Initialize the module. ++ */ ++static int __init ipmisensors_init(void) ++{ ++ int error; ++ printk(KERN_INFO "ipmisensors - IPMI BMC sensors interface\n"); ++ ++ /* init cache managers */ ++ driver_data.sdrdata_cache = ++ kmem_cache_create("ipmisensors_sdrdata", sizeof(struct sdrdata), 0, ++ 0, NULL, NULL); ++ driver_data.sysfsattr_cache = ++ kmem_cache_create("ipmisensors_sysfsattr", ++ sizeof(struct ipmisensors_device_attribute), 0, 0, ++ NULL, NULL); ++ ++ if (!driver_data.sdrdata_cache || !driver_data.sysfsattr_cache) { ++ if (driver_data.sdrdata_cache) ++ kmem_cache_destroy(driver_data.sdrdata_cache); ++ if (driver_data.sysfsattr_cache) ++ kmem_cache_destroy(driver_data.sysfsattr_cache); ++ return -ENOMEM; ++ } ++ ++ /* register IPMI interface callback(s) */ ++ error = ipmi_smi_watcher_register(&driver_data.smi_watcher); ++ if (error) { ++ printk(KERN_WARNING ++ "ipmisensors: can't register smi watcher\n"); ++ return error; ++ } ++ ++ /* create work queue, keep it simple, single-threaded */ ++ ipmisensors_workqueue = ++ create_singlethread_workqueue("ipmisensors_workqueue"); ++ ++ return 0; ++} ++ ++/** ++ * Cleanup ++ */ ++static void ipmisensors_cleanup(void) ++{ ++ /* start cleanup */ ++ cleanup = 1; ++ ++ /* unregister bmcs */ ++ printk(KERN_DEBUG "ipmisensors: unregister bmcs\n"); ++ ipmi_smi_watcher_unregister(&driver_data.smi_watcher); ++ ipmisensors_unregister_bmc_all(); ++ ++ /* flush & destroy work queue */ ++ printk(KERN_DEBUG "ipmisensors: destroy workqueue\n"); ++ flush_workqueue(ipmisensors_workqueue); ++ destroy_workqueue(ipmisensors_workqueue); ++ ++ /* remove cache managers */ ++ if (driver_data.sdrdata_cache) ++ kmem_cache_destroy(driver_data.sdrdata_cache); ++ if (driver_data.sysfsattr_cache) ++ kmem_cache_destroy(driver_data.sysfsattr_cache); ++} ++ ++/** ++ * Cleanup and exit the module ++ */ ++static void __exit ipmisensors_exit(void) ++{ ++ ipmisensors_cleanup(); ++ printk(KERN_DEBUG "ipmisensors: cleanup finished\n"); ++} ++ ++MODULE_AUTHOR("Yani Ioannou <yani.ioannou@gmail.com>"); ++MODULE_DESCRIPTION("IPMI BMC sensors"); ++MODULE_LICENSE("GPL"); ++ ++module_init(ipmisensors_init); ++module_exit(ipmisensors_exit); +diff -rduNp linux-2.6.20.3.orig/drivers/hwmon/ipmisensors.h linux-2.6.20.3/drivers/hwmon/ipmisensors.h +--- linux-2.6.20.3.orig/drivers/hwmon/ipmisensors.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.20.3/drivers/hwmon/ipmisensors.h 2007-03-14 14:41:23.000000000 +0100 +@@ -0,0 +1,240 @@ ++/* ++ * ipmisensors.h - lm_sensors interface to IPMI sensors. ++ * ++ * Copyright (C) 2004-2006 Yani Ioannou <yani.ioannou@gmail.com> ++ * ++ * Adapted from bmcsensors (lm-sensors for linux 2.4) ++ * bmcsensors (C) Mark D. Studebaker <mdsxyz123@yahoo.com> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#include <linux/ipmi.h> ++#include <linux/list.h> ++#include <linux/slab.h> ++#include <linux/workqueue.h> ++ ++/* SDR defs */ ++#define STYPE_TEMP 0x01 ++#define STYPE_VOLT 0x02 ++#define STYPE_CURR 0x03 ++#define STYPE_FAN 0x04 ++ ++#define SDR_LIMITS 8 ++#define SDR_MAX_ID_LENGTH 16 ++#define SDR_MAX_UNPACKED_ID_LENGTH ((SDR_MAX_ID_LENGTH * 4 / 3) + 2) ++ ++/* the last sensor type we are interested in */ ++#define STYPE_MAX 4 ++ ++#define IPMI_SDR_SIZE 67 ++#define IPMI_CHUNK_SIZE 16 ++ ++#define MAX_FILENAME_LENGTH 30 ++ ++struct ipmisensors_device_attribute { ++ struct device_attribute dev_attr; ++ struct sdrdata *sdr; ++}; ++#define to_ipmisensors_dev_attr(_dev_attr) \ ++ container_of(_dev_attr, struct ipmisensors_device_attribute, dev_attr) ++ ++#define IPMISENSORS_DEVICE_ATTR(_name,_mode,_show,_store,_index) \ ++struct ipmisensors_attribute sensor_dev_attr_##_name = { \ ++ .dev_attr = __ATTR(_name,_mode,_show,_store), \ ++ .index = _index, \ ++} ++ ++struct ipmisensors_bmc_device_attribute { ++ struct device_attribute dev_attr; ++ struct ipmisensors_bmc_data *bmc; ++}; ++#define to_ipmisensors_bmc_dev_attr(_dev_attr) \ ++ container_of(_dev_attr, struct ipmisensors_bmc_device_attribute, dev_attr) ++ ++/** ++ * &struct_sdrdata stores the IPMI Sensor Data Record (SDR) data, as recieved from the BMC, along with the corresponding sysfs attributes ++ */ ++struct sdrdata { ++ struct list_head list; ++ /* retrieved from SDR, not expected to change */ ++ /* Sensor Type Code */ ++ u8 stype; ++ u8 number; ++ /* Sensor Capability Code */ ++ u8 capab; ++ u16 thresh_mask; ++ u8 format; ++ u8 linear; ++ s16 m; ++ s16 b; ++ u8 k; ++ u8 nominal; ++ u8 limits[SDR_LIMITS]; ++ /* index into limits for reported upper and lower limit */ ++ int lim1, lim2; ++ u8 lim1_write, lim2_write; ++ u8 string_type; ++ u8 id_length; ++ u8 id[SDR_MAX_ID_LENGTH]; ++ /* retrieved from reading */ ++ u8 reading; ++ u8 status; ++ u8 thresholds; ++ /* sensor's bmc */ ++ struct ipmisensors_bmc_data *bmc; ++ /* sysfs entries */ ++ struct ipmisensors_device_attribute attr; ++ char *attr_name; ++ struct ipmisensors_device_attribute attr_min; ++ char *attr_min_name; ++ struct ipmisensors_device_attribute attr_max; ++ char *attr_max_name; ++ struct ipmisensors_device_attribute attr_label; ++ char *attr_label_name; ++ ++}; ++ ++/** ++ * &struct_ipmisensors_data stores the data for the ipmisensors driver. ++ */ ++struct ipmisensors_data { ++ /* Driver struct */ ++ char *driver_name; ++ ++ /* Linked list of ipmisensors_bmc_data structs, one for each BMC */ ++ struct list_head bmc_data; ++ ++ /* Number of ipmi interfaces (and hence ipmisensors_data structs). */ ++ int interfaces; ++ ++ /* IPMI kernel interface - SMI watcher */ ++ struct ipmi_smi_watcher smi_watcher; ++ ++ /* IPMI kernel interface - user handlers */ ++ struct ipmi_user_hndl ipmi_hndlrs; ++ ++ /* Cache manager for sdrdata cache */ ++ struct kmem_cache *sdrdata_cache; ++ ++ /* Cache manager for ipmi_sensor_device_attribute cache */ ++ struct kmem_cache *sysfsattr_cache; ++}; ++ ++/** ++ * &states: enumeration of state codes for a bmc specific ipmisensors ++ */ ++enum states { ++ STATE_INIT, ++ STATE_RESERVE, ++ STATE_SDR, ++ STATE_SDRPARTIAL, ++ STATE_READING, ++ STATE_UNCANCEL, ++ STATE_SYSTABLE, ++ STATE_DONE ++}; ++ ++/** ++ * &struct_ipmisensors_bmc_data stores the data for a particular IPMI BMC. ++ */ ++struct ipmisensors_bmc_data { ++ struct list_head list; ++ ++ /* The IPMI interface number */ ++ int interface_id; ++ ++ /* The IPMI address */ ++ struct ipmi_addr address; ++ ++ /* List of sdrdata structs (sdrs) recieved from the BMC */ ++ struct list_head sdrs; ++ ++ /* Count of the number of sdrs stored in the sdr list */ ++ int sdr_count; ++ ++ /* next message id */ ++ int msgid; ++ ++ /* The ipmi interface 'user' used to access this particular bmc */ ++ ipmi_user_t user; ++ ++ /* BMC IPMI Version (major) */ ++ unsigned char ipmi_version_major; ++ ++ /* BMC IPMI Version (minor) */ ++ unsigned char ipmi_version_minor; ++ ++ /* The size of the SDR request message */ ++ int ipmi_sdr_partial_size; ++ ++ /* transmit message buffer */ ++ struct kernel_ipmi_msg tx_message; ++ ++ /* ipmi transmited data buffer */ ++ unsigned char tx_msg_data[IPMI_MAX_MSG_LENGTH + 50]; /* why the +50 in bmcsensors? */ ++ ++ /* ipmi recieved data buffer */ ++ unsigned char rx_msg_data[IPMI_MAX_MSG_LENGTH + 50]; ++ ++ /* current recieve buffer offset */ ++ int rx_msg_data_offset; ++ ++ /* The id of then next SDR record to read during update cycle */ ++ u16 nextrecord; ++ ++ /* BMC SDR Reservation ID */ ++ u16 resid; ++ ++ /* Alarm status */ ++ u8 alarms; ++ ++ /* The cumalative error count for this bmc */ ++ int errorcount; ++ ++ /* The current state of this bmc w.r.t. ipmisensors (see enum states) */ ++ int state; ++ ++ /* The current sdr for which a reading is pending */ ++ struct sdrdata *current_sdr; ++ ++ /* The BMC's device struct */ ++ struct device *dev; ++ ++ /* hwmon class device */ ++ struct class_device *class_dev; ++ ++ /* hwmon device name */ ++ struct device_attribute name_attr; ++ ++ /* alarms attribute */ ++ struct ipmisensors_bmc_device_attribute alarms_attr; ++ ++ /* update_period attribute */ ++ struct ipmisensors_bmc_device_attribute update_attr; ++ ++ /* lower bound on time between updates (in seconds) */ ++ unsigned int update_period; ++ ++ /* semaphore used to do a headcount of the SDR readings we are waiting ++ * on in a given bmc update */ ++ struct semaphore update_semaphore; ++ ++ /* bmc's work struct for updating sensors */ ++ struct delayed_work update_work; ++ ++ /* bmc's work struct for building the sysfs workqueue */ ++ struct work_struct sysfs_work; ++}; +diff -rduNp linux-2.6.20.3.orig/include/linux/ipmi.h linux-2.6.20.3/include/linux/ipmi.h +--- linux-2.6.20.3.orig/include/linux/ipmi.h 2007-03-13 19:27:08.000000000 +0100 ++++ linux-2.6.20.3/include/linux/ipmi.h 2007-03-14 14:23:02.000000000 +0100 +@@ -300,6 +300,9 @@ int ipmi_create_user(unsigned int + safe, too. */ + int ipmi_destroy_user(ipmi_user_t user); + ++/* Get the IPMI BMC's device struct */ ++struct device *ipmi_get_bmcdevice(int ipmi_intf); ++ + /* Get the IPMI version of the BMC we are talking to. */ + void ipmi_get_version(ipmi_user_t user, + unsigned char *major, +diff -rduNp linux-2.6.20.3.orig/include/linux/ipmi_msgdefs.h linux-2.6.20.3/include/linux/ipmi_msgdefs.h +--- linux-2.6.20.3.orig/include/linux/ipmi_msgdefs.h 2007-03-13 19:27:08.000000000 +0100 ++++ linux-2.6.20.3/include/linux/ipmi_msgdefs.h 2007-03-14 14:23:02.000000000 +0100 +@@ -45,6 +45,7 @@ + + #define IPMI_NETFN_APP_REQUEST 0x06 + #define IPMI_NETFN_APP_RESPONSE 0x07 ++#define IPMI_GET_DEVICE_GUID_CMD 0x08 + #define IPMI_GET_DEVICE_ID_CMD 0x01 + #define IPMI_COLD_RESET_CMD 0x02 + #define IPMI_WARM_RESET_CMD 0x03 +@@ -57,6 +58,11 @@ + #define IPMI_GET_BMC_GLOBAL_ENABLES_CMD 0x2f + #define IPMI_READ_EVENT_MSG_BUFFER_CMD 0x35 + #define IPMI_GET_CHANNEL_INFO_CMD 0x42 ++#define IPMI_RESERVE_SDR 0x22 ++#define IPMI_GET_SDR 0x23 ++#define IPMI_GET_SENSOR_STATE_READING 0x2D ++#define IPMI_SET_SENSOR_HYSTERESIS 0x24 ++#define IPMI_SET_SENSOR_THRESHOLD 0x26 + + #define IPMI_NETFN_STORAGE_REQUEST 0x0a + #define IPMI_NETFN_STORAGE_RESPONSE 0x0b +@@ -79,10 +85,13 @@ + #define IPMI_NODE_BUSY_ERR 0xc0 + #define IPMI_INVALID_COMMAND_ERR 0xc1 + #define IPMI_TIMEOUT_ERR 0xc3 ++#define IPMI_INVALID_RESERVATION_ID 0xc5 + #define IPMI_ERR_MSG_TRUNCATED 0xc6 + #define IPMI_REQ_LEN_INVALID_ERR 0xc7 + #define IPMI_REQ_LEN_EXCEEDED_ERR 0xc8 + #define IPMI_NOT_IN_MY_STATE_ERR 0xd5 /* IPMI 2.0 */ ++#define IPMI_ERR_RETURNING_REQ_BYTES 0xca ++#define IPMI_ERR_PROVIDING_RESPONSE 0xce + #define IPMI_LOST_ARBITRATION_ERR 0x81 + #define IPMI_BUS_ERR 0x82 + #define IPMI_NAK_ON_WRITE_ERR 0x83 |