diff -Nru whoopsie-daisy-0.1.3/data/apache.conf whoopsie-daisy-0.1.4/data/apache.conf
--- whoopsie-daisy-0.1.3/data/apache.conf 1970-01-01 00:00:00.000000000 +0000
+++ whoopsie-daisy-0.1.4/data/apache.conf 2012-01-20 12:49:36.000000000 +0000
@@ -0,0 +1,18 @@
+
+ ServerAdmin webmaster@localhost
+
+ DocumentRoot /var/www
+ WSGIScriptAlias / /var/www/submit.wsgi
+ RewriteEngine on
+ RewriteRule ^/([^/]+)/submit-core/([^/]+)/([^/]+) /submit_core.wsgi?uuid=$1&arch=$2&systemuuid=$3 [L]
+
+ SetHandler wsgi-script
+ # FIXME: This will break core file submissions.
+ # Don't allow requests greater than 10 MB
+ LimitRequestBody 10485760
+
+ ErrorLog /var/log/apache2/error.log
+ LogLevel warn
+ CustomLog /var/log/apache2/access.log combined
+
+
diff -Nru whoopsie-daisy-0.1.3/debian/changelog whoopsie-daisy-0.1.4/debian/changelog
--- whoopsie-daisy-0.1.3/debian/changelog 2012-01-18 17:58:21.000000000 +0000
+++ whoopsie-daisy-0.1.4/debian/changelog 2012-01-26 12:46:56.000000000 +0000
@@ -1,3 +1,14 @@
+whoopsie-daisy (0.1.4) precise; urgency=low
+
+ * Write the system UUID to the UserOOPS ColumnFamily.
+ * Drop the CAP_FOWNER stuff. As James points out, we can just write a
+ .uploaded file and let cron clean up the mess.
+ * Have the client pass the architecture, rather that have an intermediary
+ processing step in the MQ.
+ * Add retracing support in process_core.py.
+
+ -- Evan Dandrea Thu, 26 Jan 2012 12:46:54 +0000
+
whoopsie-daisy (0.1.3) precise; urgency=low
* Drop NetworkManager cflags and libs from Makefile.
diff -Nru whoopsie-daisy-0.1.3/process_core.py whoopsie-daisy-0.1.4/process_core.py
--- whoopsie-daisy-0.1.3/process_core.py 2012-01-11 16:26:38.000000000 +0000
+++ whoopsie-daisy-0.1.4/process_core.py 2012-01-26 12:33:44.000000000 +0000
@@ -21,45 +21,98 @@
import atexit
import os
from subprocess import Popen, PIPE
+import apport
+from pycassa.pool import ConnectionPool
+from pycassa.columnfamily import ColumnFamily
+from hashlib import md5
-host = '10.55.60.168'
-connection = pika.BlockingConnection(pika.ConnectionParameters(host=host))
+cas_host = '10.55.60.75:9160'
+pool = ConnectionPool('testing', [cas_host])
+oops_fam = ColumnFamily(pool, 'OOPS')
+indexes_fam = ColumnFamily(pool, 'Indexes')
+stack_fam = ColumnFamily(pool, 'Stacktrace')
+
+mq_host = '10.55.60.168'
+# TODO envar or parameters
+sandbox = 'sandbox'
+cache = '/tmp/cache'
+connection = pika.BlockingConnection(pika.ConnectionParameters(host=mq_host))
atexit.register(connection.close)
channel = connection.channel()
-for queue in ('process_cores', 'retrace_amd64', 'retrace_i386'):
+for queue in ('retrace_amd64', 'retrace_i386'):
channel.queue_declare(queue=queue, durable=True)
-def callback(ch, method, props, body):
- path = body
- if os.path.exists(path):
- new_path = '%s.core' % path
- with open(new_path, 'wb') as fp:
- p1 = Popen(['base64', '-d', path], stdout=PIPE)
- p2 = Popen(['zcat'], stdin=p1.stdout, stdout=fp)
- p2.communicate()
- cmd = ['objdump', '-a', new_path]
- result = Popen(cmd, stdout=PIPE).communicate()[0]
- arch = None
- for line in result.splitlines():
- if line.endswith('elf64-x86-64'):
- arch = 'amd64'
- break
- elif line.endswith('elf32-i386'):
- arch = 'i386'
- break
- if arch:
- # Ready to be retraced. Put on a retracing MQ.
- channel.basic_publish(
- exchange='', routing_key='retrace_%s' % arch, body=new_path,
- properties=pika.BasicProperties(delivery_mode=2))
+def callback(ch, method, props, path):
+ print 'Processing', path
+ if not os.path.exists(path):
+ print path, 'does not exist, skipping.'
+ # We've processed this. Delete it off the MQ.
+ ch.basic_ack(delivery_tag=method.delivery_tag)
+ os.remove(path)
+
+ new_path = '%s.core' % path
+ with open(new_path, 'wb') as fp:
+ print 'Decompressing to', new_path
+ p1 = Popen(['base64', '-d', path], stdout=PIPE)
+ p2 = Popen(['zcat'], stdin=p1.stdout, stdout=fp)
+ ret = p2.communicate()
+ if p2.returncode != 0:
+ print >>sys.stderr, 'Error processing %s:\n%s' % (path, ret[1])
+ # We've processed this. Delete it off the MQ.
+ ch.basic_ack(delivery_tag=method.delivery_tag)
+ os.remove(path)
+ os.remove(new_path)
+ return
+
+ report = apport.Report()
+ uuid = path.rsplit('/', 1)[1]
+ # TODO use oops-repository instead
+ col = oops_fam.get(uuid)
+ for k in col:
+ report[k] = col[k]
+
+ report['CoreDump'] = (new_path,)
+ report_path = '%s.crash' % path
+ with open(report_path, 'w') as fp:
+ report.write(fp)
+ print 'Retracing'
+ proc = Popen(['apport-retrace', report_path, '-S', sandbox, '-C',
+ cache, '-o', '%s.new' % report_path])
+ proc.communicate()
+ # TODO Put failed traces on a failed queue.
+ if proc.returncode == 0:
+ print 'Writing back to Cassandra'
+ report = apport.Report()
+ report.load(open('%s.new' % report_path, 'r'))
+ stacktrace_addr_sig = report['StacktraceAddressSignature']
+ stacktrace = report['Stacktrace']
+ hashed_stack = md5(stacktrace).hexdigest()
+
+ # We want really quick lookups of whether we have a stacktrace
+ # for this signature, so that we can quickly tell the client
+ # whether we need a core dump from it.
+ indexes_fam.insert('stacktrace_hashes_by_signature',
+ {stacktrace_addr_sig : hashed_stack})
+ stack_fam.insert(hashed_stack, {'stacktrace' : stacktrace})
+ else:
+ print 'Could not retrace.'
+
# We've processed this. Delete it off the MQ.
ch.basic_ack(delivery_tag=method.delivery_tag)
- os.remove(path)
+ for p in (path, new_path, report_path, '%s.new' % report_path):
+ try:
+ os.remove(p)
+ except OSError:
+ if errno != 2:
+ raise
+ print 'Done processing', path
channel.basic_qos(prefetch_count=1)
-channel.basic_consume(callback, queue='process_cores')
+p = Popen(['dpkg-architecture', '-qDEB_HOST_ARCH'], stdout=PIPE)
+arch = p.communicate()[0].strip('\n')
print 'Waiting for messages. ^C to exit.'
+channel.basic_consume(callback, queue='retrace_%s' % arch)
try:
channel.start_consuming()
except KeyboardInterrupt:
diff -Nru whoopsie-daisy-0.1.3/README whoopsie-daisy-0.1.4/README
--- whoopsie-daisy-0.1.3/README 2012-01-13 10:00:37.000000000 +0000
+++ whoopsie-daisy-0.1.4/README 2012-01-20 12:46:10.000000000 +0000
@@ -15,23 +15,8 @@
Install pycassa on the webserver. Run oopsrepository/schema.py to create the default schema.
Install pika on the webserver for talking to the MQ.
Install rabbitmq-server on the RabbitMQ server. Make sure this is at least 2.0.
-Install libapache2-mod-wsgi on the webserver, enable mod_rewrite, and configure:
-
-
- ServerAdmin webmaster@localhost
- DocumentRoot /var/www
- WSGIScriptAlias / /var/www/submit.wsgi
- RewriteEngine on
- RewriteRule ^/([^/]+)/submit-core/([^/]+) /submit_core.wsgi?uuid=$1&systemuuid=$2 [L]
-
- SetHandler wsgi-script
- # Don't allow requests greater than 10 MB
- LimitRequestBody 10485760
-
- ErrorLog /var/log/apache2/error.log
- LogLevel warn
- CustomLog /var/log/apache2/access.log combined
-
+Install libapache2-mod-wsgi on the webserver, enable mod_rewrite, and configure
+using the sample in data/.
Run `python process_core.py` on a server with access to the location where the
core files are written.
diff -Nru whoopsie-daisy-0.1.3/src/whoopsie.c whoopsie-daisy-0.1.4/src/whoopsie.c
--- whoopsie-daisy-0.1.3/src/whoopsie.c 2012-01-18 16:52:54.000000000 +0000
+++ whoopsie-daisy-0.1.4/src/whoopsie.c 2012-01-20 12:36:22.000000000 +0000
@@ -314,7 +314,7 @@
}
gboolean
-upload_core (const char* uuid, const char* core_data) {
+upload_core (const char* uuid, const char* arch, const char* core_data) {
CURL* curl = NULL;
CURLcode result_code = 0;
char* response_data = NULL;
@@ -322,8 +322,8 @@
struct curl_slist* list = NULL;
char* crash_db_core_url = NULL;
- asprintf (&crash_db_core_url, "%s/%s/submit-core/%s",
- crash_db_url, uuid, sha512_system_uuid);
+ asprintf (&crash_db_core_url, "%s/%s/submit-core/%s/%s",
+ crash_db_url, uuid, arch, sha512_system_uuid);
/* TODO use CURLOPT_READFUNCTION to transparently compress data with
* Snappy. */
@@ -366,6 +366,7 @@
char* response_data = NULL;
char* command = NULL;
char* core = NULL;
+ char* arch = NULL;
bson b[1];
report = parse_report (crash_file);
@@ -386,17 +387,17 @@
if (command) {
if (strcmp (command, "CORE") == 0) {
core = g_hash_table_lookup (report, "CoreDump");
- if (core) {
- if (!upload_core (response_data, core)) {
- /* TODO handle retrying? */
+ arch = g_hash_table_lookup (report, "Architecture");
+ if (core && arch) {
+ if (!upload_core (response_data, arch, core))
+ /* We do not retry the upload. Once is a big enough hit
+ * to their Internet connection, and we can always
+ * count on the next person in line to send it. */
printf ("Upload of the core dump failed.\n");
- }
- } else {
+ } else
printf ("Asked for a core dump that we don't have.\n");
- }
- } else {
+ } else
printf ("Got command: %s\n", command);
- }
}
}
@@ -410,25 +411,44 @@
void
create_file (const char* upload)
{
+ /* TODO why are we strdup'ing this, rather than just leaving it as const on
+ * the g_queue? */
char* upload_file = g_strdup (upload);
char* crash_file = upload_to_crash_file (upload_file);
-
- if (g_file_test (crash_file, G_FILE_TEST_EXISTS)) {
- g_message ("%s exists", crash_file);
- if (online_state && parse_and_upload_report (crash_file)) {
- if (g_unlink (upload_file))
- g_warning ("Unable to remove: %s", upload_file);
- free (crash_file);
- } else {
- g_warning ("Adding to queue: %s", upload_file);
- g_queue_push_head (report_queue, (gpointer)upload_file);
- }
+ char* uploaded_file = NULL;
+ int fd = -1;
+ asprintf (&uploaded_file, "%sed", upload_file);
+
+ /* We've already handled this. */
+ if (g_file_test (uploaded_file, G_FILE_TEST_EXISTS))
+ goto out;
+
+ /* Already cleaned up? Nothing more we can do. */
+ if (!g_file_test (crash_file, G_FILE_TEST_EXISTS)) {
+ if ((fd = creat (uploaded_file, 0600)) < 0)
+ g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno));
+ goto out;
+ }
+
+ g_message ("%s exists", crash_file);
+ if (online_state && parse_and_upload_report (crash_file)) {
+ if ((fd = creat (uploaded_file, 0600)) < 0)
+ g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno));
+ goto out;
} else {
- /* Already cleaned up? Nothing more we can do. */
- if (g_unlink (upload_file))
- g_warning ("Unable to remove: %s", upload_file);
+ g_warning ("Adding to queue: %s", upload_file);
+ g_queue_push_head (report_queue, (gpointer)upload_file);
+ /* Don't free the upload file, as we've pushed it onto the queue */
free (crash_file);
+ free (uploaded_file);
+ return;
}
+
+ out:
+ close (fd);
+ free (crash_file);
+ free (uploaded_file);
+ free (upload_file);
}
gboolean
@@ -480,19 +500,27 @@
process_queue (void) {
g_warning ("Processing queue.");
GList* list = NULL;
- char *upload_file, *crash_file = NULL;
+ char *upload_file, *crash_file, *uploaded_file = NULL;
+ int fd = -1;
list = report_queue->head;
while (list) {
GList* next = list->next;
upload_file = list->data;
if (g_file_test (upload_file, G_FILE_TEST_EXISTS)) {
crash_file = upload_to_crash_file (upload_file);
- if (online_state && parse_and_upload_report (crash_file)) {
- if (g_unlink (upload_file))
- g_warning ("Unable to remove: %s", upload_file);
+ asprintf (&uploaded_file, "%sed", upload_file);
+
+ /* We've already handled this. */
+ if (g_file_test (uploaded_file, G_FILE_TEST_EXISTS))
+ remove_from_report_queue (upload_file);
+
+ else if (online_state && parse_and_upload_report (crash_file)) {
+ if ((fd = creat (uploaded_file, 0600)) < 0)
+ g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno));
remove_from_report_queue (upload_file);
}
free (crash_file);
+ free (uploaded_file);
} else {
remove_from_report_queue (upload_file);
}
@@ -506,26 +534,36 @@
{
GDir* dir = NULL;
const gchar *file, *ext = NULL;
- gchar* upload_file = NULL;
- char* crash_file = NULL;
+ char *upload_file, *crash_file, *uploaded_file = NULL;
+ int fd = -1;
dir = g_dir_open ("/var/crash", 0, NULL);
while ((file = g_dir_read_name (dir)) != NULL) {
upload_file = g_build_filename ("/var/crash", file, NULL);
- ext = strrchr(upload_file, '.');
+ ext = strrchr (upload_file, '.');
if (ext && strcmp(++ext, "upload") == 0) {
crash_file = upload_to_crash_file (upload_file);
- if (online_state && parse_and_upload_report (crash_file)) {
- if (g_unlink (upload_file))
- g_warning ("Unable to remove: %s", upload_file);
- free (crash_file);
- free (upload_file);
+ asprintf (&uploaded_file, "%sed", upload_file);
+
+ /* We've already handled this. */
+ if (g_file_test (uploaded_file, G_FILE_TEST_EXISTS))
+ remove_from_report_queue (upload_file);
+
+ else if (online_state && parse_and_upload_report (crash_file)) {
+ if ((fd = creat (uploaded_file, 0600)) < 0)
+ g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno));
} else {
g_queue_push_head (report_queue, upload_file);
+ free (uploaded_file);
+ free (crash_file);
+ /* Don't free the upload file, as we've pushed it onto the
+ * queue */
+ continue;
}
- } else {
- free (upload_file);
+ free (uploaded_file);
+ free (crash_file);
}
+ free (upload_file);
}
g_dir_close (dir);
}
@@ -649,49 +687,9 @@
}
static void
-create_namespace (void)
-{
- /* We're going to override +t globally, so let's play it safe and restrict
- * ourselves to only being able to write in /var/crash. */
-
- mkdir ("/var/tmp/whoopsie", 0755);
-
- /* Set up a private mount namespace. */
- if (unshare (CLONE_NEWNS) == -1)
- g_error ("CLONE_NEWNS failed.");
-
- if (mount ("/", "/var/tmp/whoopsie", NULL, MS_BIND | MS_REC | MS_RDONLY, NULL))
- g_error ("Could not bind mount /.");
-
- if (mount ("/var/crash", "/var/tmp/whoopsie/var/crash", NULL, MS_BIND, NULL))
- g_error ("Could not rw mount /var/crash.");
-
- if (chroot ("/var/tmp/whoopsie"))
- g_error ("Could not chroot.");
-
- if (chdir ("/"))
- g_error ("Could not chdir to /.");
-
- /* We don't need to worry about unmounting the above bind mounts, as once
- * we leave the namespace, they will be released:
- * http://lxr.linux.no/linux+v3.2.1/fs/namespace.c#L2736 */
-}
-
-static void
drop_privileges (void)
{
struct passwd *pw = NULL;
- cap_t cap;
- /* Specify that we want to ignore the directory sticky bit */
- cap_value_t cap_list[] = {CAP_FOWNER};
-
- if (!CAP_IS_SUPPORTED (CAP_SETFCAP))
- g_error ("SETFCAP is not supported.");
-
-
- /* Ensure that we don't lose the capabilities when we drop privileges */
- if (prctl (PR_SET_KEEPCAPS, 1) < 0)
- g_error ("prctl failed.");
if (!(pw = getpwnam (username)))
g_error ("Failed to find user: %s", username);
@@ -705,23 +703,6 @@
setenv ("USER", username, 1);
setenv ("USERNAME", username, 1);
-
- /* Now drop all capabilities but CAP_SETFCAP and CAP_FOWNER */
- cap = cap_init ();
- if (cap == NULL)
- g_error ("cap_get_proc failed.");
- if (cap_set_flag (cap, CAP_EFFECTIVE, 1, cap_list, CAP_SET) == -1)
- g_error ("cap_set_flag CAP_EFFECTIVE failed.");
- if (cap_set_flag (cap, CAP_PERMITTED, 1, cap_list, CAP_SET) == -1)
- g_error ("cap_set_flag CAP_PERMITTED failed.");
- if (cap_set_proc (cap) == -1)
- g_error ("cap_set_proc failed.");
- cap_free (cap);
-
- cap_clear (cap);
- cap = cap_get_proc ();
- g_warning ("capabilities: %s\n", cap_to_text(cap, NULL));
- cap_free (cap);
}
void
@@ -741,6 +722,8 @@
GNetworkMonitor* nm = NULL;
GSocketConnectable *addr = NULL;
addr = g_network_address_parse_uri (crash_db_url, 80, NULL);
+ if (addr == NULL)
+ return;
nm = g_network_monitor_get_default ();
if (!nm)
@@ -779,7 +762,6 @@
}
free (system_uuid);
- create_namespace ();
drop_privileges ();
exit_if_already_running ();
diff -Nru whoopsie-daisy-0.1.3/submit_core.wsgi whoopsie-daisy-0.1.4/submit_core.wsgi
--- whoopsie-daisy-0.1.3/submit_core.wsgi 2012-01-11 12:18:04.000000000 +0000
+++ whoopsie-daisy-0.1.4/submit_core.wsgi 2012-01-24 10:35:23.000000000 +0000
@@ -23,24 +23,26 @@
import atexit
host = '10.55.60.168'
+ostream = 'application/octet-stream'
connection = pika.BlockingConnection(pika.ConnectionParameters(host=host))
channel = connection.channel()
-channel.queue_declare(queue='process_cores', durable=True)
atexit.register(connection.close)
def application(environ, start_response):
params = parse_qs(environ.get('QUERY_STRING'))
- if params and 'uuid' in params:
+ uuid = ''
+ if params and 'uuid' in params and 'arch' in params:
uuid = escape(params['uuid'][0])
- if environ.has_key('CONTENT_TYPE') and environ['CONTENT_TYPE'] == 'application/octet-stream':
- path = '/tmp/%s' % uuid
+ arch = escape(params['arch'][0])
+ if environ.has_key('CONTENT_TYPE') and environ['CONTENT_TYPE'] == ostream:
+ path = '/srv/cores/%s' % uuid
+ queue = 'retrace_%s' % arch
with open (path, 'w') as fp:
shutil.copyfileobj(environ['wsgi.input'], fp, 512)
+ channel.queue_declare(queue=queue, durable=True)
channel.basic_publish(
- exchange='', routing_key='process_cores', body=path,
+ exchange='', routing_key=queue, body=path,
properties=pika.BasicProperties(delivery_mode=2))
- start_response('200 OK', [])
- return [uuid]
start_response('200 OK', [])
- return ['']
+ return [uuid]
diff -Nru whoopsie-daisy-0.1.3/submit.wsgi whoopsie-daisy-0.1.4/submit.wsgi
--- whoopsie-daisy-0.1.3/submit.wsgi 2012-01-18 17:02:59.000000000 +0000
+++ whoopsie-daisy-0.1.4/submit.wsgi 2012-01-19 12:29:18.000000000 +0000
@@ -60,10 +60,14 @@
data = None
if environ.has_key(content_type) and environ[content_type] == ostream:
data = environ['wsgi.input'].read()
+ user_token = None
+ # / + 128 character system UUID
+ if len(environ['PATH_INFO']) == 129:
+ user_token = environ['PATH_INFO'][1:]
row_key = str(uuid.uuid1())
# TODO exceptions
try:
- key = oopses.insert_bson(oops_config, row_key, data)
+ key = oopses.insert_bson(oops_config, row_key, data, user_token)
except bson.errors.InvalidBSON:
start_response('400 Bad Request', [])
return []
diff -Nru whoopsie-daisy-0.1.3/tools/purge.py whoopsie-daisy-0.1.4/tools/purge.py
--- whoopsie-daisy-0.1.3/tools/purge.py 1970-01-01 00:00:00.000000000 +0000
+++ whoopsie-daisy-0.1.4/tools/purge.py 2012-01-24 10:45:11.000000000 +0000
@@ -0,0 +1,12 @@
+import pika
+import atexit
+import sys
+
+if len(sys.argv) < 2:
+ print 'usage:', sys.argv[0], ''
+ sys.exit(1)
+host = '10.55.60.168'
+conn = pika.BlockingConnection(pika.ConnectionParameters(host))
+atexit.register(conn.close)
+channel = conn.channel()
+channel.queue_purge(queue=sys.argv[1])