diff -Nru whoopsie-daisy-0.1.3/data/apache.conf whoopsie-daisy-0.1.4/data/apache.conf --- whoopsie-daisy-0.1.3/data/apache.conf 1970-01-01 00:00:00.000000000 +0000 +++ whoopsie-daisy-0.1.4/data/apache.conf 2012-01-20 12:49:36.000000000 +0000 @@ -0,0 +1,18 @@ + + ServerAdmin webmaster@localhost + + DocumentRoot /var/www + WSGIScriptAlias / /var/www/submit.wsgi + RewriteEngine on + RewriteRule ^/([^/]+)/submit-core/([^/]+)/([^/]+) /submit_core.wsgi?uuid=$1&arch=$2&systemuuid=$3 [L] + + SetHandler wsgi-script + # FIXME: This will break core file submissions. + # Don't allow requests greater than 10 MB + LimitRequestBody 10485760 + + ErrorLog /var/log/apache2/error.log + LogLevel warn + CustomLog /var/log/apache2/access.log combined + + diff -Nru whoopsie-daisy-0.1.3/debian/changelog whoopsie-daisy-0.1.4/debian/changelog --- whoopsie-daisy-0.1.3/debian/changelog 2012-01-18 17:58:21.000000000 +0000 +++ whoopsie-daisy-0.1.4/debian/changelog 2012-01-26 12:46:56.000000000 +0000 @@ -1,3 +1,14 @@ +whoopsie-daisy (0.1.4) precise; urgency=low + + * Write the system UUID to the UserOOPS ColumnFamily. + * Drop the CAP_FOWNER stuff. As James points out, we can just write a + .uploaded file and let cron clean up the mess. + * Have the client pass the architecture, rather that have an intermediary + processing step in the MQ. + * Add retracing support in process_core.py. + + -- Evan Dandrea Thu, 26 Jan 2012 12:46:54 +0000 + whoopsie-daisy (0.1.3) precise; urgency=low * Drop NetworkManager cflags and libs from Makefile. diff -Nru whoopsie-daisy-0.1.3/process_core.py whoopsie-daisy-0.1.4/process_core.py --- whoopsie-daisy-0.1.3/process_core.py 2012-01-11 16:26:38.000000000 +0000 +++ whoopsie-daisy-0.1.4/process_core.py 2012-01-26 12:33:44.000000000 +0000 @@ -21,45 +21,98 @@ import atexit import os from subprocess import Popen, PIPE +import apport +from pycassa.pool import ConnectionPool +from pycassa.columnfamily import ColumnFamily +from hashlib import md5 -host = '10.55.60.168' -connection = pika.BlockingConnection(pika.ConnectionParameters(host=host)) +cas_host = '10.55.60.75:9160' +pool = ConnectionPool('testing', [cas_host]) +oops_fam = ColumnFamily(pool, 'OOPS') +indexes_fam = ColumnFamily(pool, 'Indexes') +stack_fam = ColumnFamily(pool, 'Stacktrace') + +mq_host = '10.55.60.168' +# TODO envar or parameters +sandbox = 'sandbox' +cache = '/tmp/cache' +connection = pika.BlockingConnection(pika.ConnectionParameters(host=mq_host)) atexit.register(connection.close) channel = connection.channel() -for queue in ('process_cores', 'retrace_amd64', 'retrace_i386'): +for queue in ('retrace_amd64', 'retrace_i386'): channel.queue_declare(queue=queue, durable=True) -def callback(ch, method, props, body): - path = body - if os.path.exists(path): - new_path = '%s.core' % path - with open(new_path, 'wb') as fp: - p1 = Popen(['base64', '-d', path], stdout=PIPE) - p2 = Popen(['zcat'], stdin=p1.stdout, stdout=fp) - p2.communicate() - cmd = ['objdump', '-a', new_path] - result = Popen(cmd, stdout=PIPE).communicate()[0] - arch = None - for line in result.splitlines(): - if line.endswith('elf64-x86-64'): - arch = 'amd64' - break - elif line.endswith('elf32-i386'): - arch = 'i386' - break - if arch: - # Ready to be retraced. Put on a retracing MQ. - channel.basic_publish( - exchange='', routing_key='retrace_%s' % arch, body=new_path, - properties=pika.BasicProperties(delivery_mode=2)) +def callback(ch, method, props, path): + print 'Processing', path + if not os.path.exists(path): + print path, 'does not exist, skipping.' + # We've processed this. Delete it off the MQ. + ch.basic_ack(delivery_tag=method.delivery_tag) + os.remove(path) + + new_path = '%s.core' % path + with open(new_path, 'wb') as fp: + print 'Decompressing to', new_path + p1 = Popen(['base64', '-d', path], stdout=PIPE) + p2 = Popen(['zcat'], stdin=p1.stdout, stdout=fp) + ret = p2.communicate() + if p2.returncode != 0: + print >>sys.stderr, 'Error processing %s:\n%s' % (path, ret[1]) + # We've processed this. Delete it off the MQ. + ch.basic_ack(delivery_tag=method.delivery_tag) + os.remove(path) + os.remove(new_path) + return + + report = apport.Report() + uuid = path.rsplit('/', 1)[1] + # TODO use oops-repository instead + col = oops_fam.get(uuid) + for k in col: + report[k] = col[k] + + report['CoreDump'] = (new_path,) + report_path = '%s.crash' % path + with open(report_path, 'w') as fp: + report.write(fp) + print 'Retracing' + proc = Popen(['apport-retrace', report_path, '-S', sandbox, '-C', + cache, '-o', '%s.new' % report_path]) + proc.communicate() + # TODO Put failed traces on a failed queue. + if proc.returncode == 0: + print 'Writing back to Cassandra' + report = apport.Report() + report.load(open('%s.new' % report_path, 'r')) + stacktrace_addr_sig = report['StacktraceAddressSignature'] + stacktrace = report['Stacktrace'] + hashed_stack = md5(stacktrace).hexdigest() + + # We want really quick lookups of whether we have a stacktrace + # for this signature, so that we can quickly tell the client + # whether we need a core dump from it. + indexes_fam.insert('stacktrace_hashes_by_signature', + {stacktrace_addr_sig : hashed_stack}) + stack_fam.insert(hashed_stack, {'stacktrace' : stacktrace}) + else: + print 'Could not retrace.' + # We've processed this. Delete it off the MQ. ch.basic_ack(delivery_tag=method.delivery_tag) - os.remove(path) + for p in (path, new_path, report_path, '%s.new' % report_path): + try: + os.remove(p) + except OSError: + if errno != 2: + raise + print 'Done processing', path channel.basic_qos(prefetch_count=1) -channel.basic_consume(callback, queue='process_cores') +p = Popen(['dpkg-architecture', '-qDEB_HOST_ARCH'], stdout=PIPE) +arch = p.communicate()[0].strip('\n') print 'Waiting for messages. ^C to exit.' +channel.basic_consume(callback, queue='retrace_%s' % arch) try: channel.start_consuming() except KeyboardInterrupt: diff -Nru whoopsie-daisy-0.1.3/README whoopsie-daisy-0.1.4/README --- whoopsie-daisy-0.1.3/README 2012-01-13 10:00:37.000000000 +0000 +++ whoopsie-daisy-0.1.4/README 2012-01-20 12:46:10.000000000 +0000 @@ -15,23 +15,8 @@ Install pycassa on the webserver. Run oopsrepository/schema.py to create the default schema. Install pika on the webserver for talking to the MQ. Install rabbitmq-server on the RabbitMQ server. Make sure this is at least 2.0. -Install libapache2-mod-wsgi on the webserver, enable mod_rewrite, and configure: - - - ServerAdmin webmaster@localhost - DocumentRoot /var/www - WSGIScriptAlias / /var/www/submit.wsgi - RewriteEngine on - RewriteRule ^/([^/]+)/submit-core/([^/]+) /submit_core.wsgi?uuid=$1&systemuuid=$2 [L] - - SetHandler wsgi-script - # Don't allow requests greater than 10 MB - LimitRequestBody 10485760 - - ErrorLog /var/log/apache2/error.log - LogLevel warn - CustomLog /var/log/apache2/access.log combined - +Install libapache2-mod-wsgi on the webserver, enable mod_rewrite, and configure +using the sample in data/. Run `python process_core.py` on a server with access to the location where the core files are written. diff -Nru whoopsie-daisy-0.1.3/src/whoopsie.c whoopsie-daisy-0.1.4/src/whoopsie.c --- whoopsie-daisy-0.1.3/src/whoopsie.c 2012-01-18 16:52:54.000000000 +0000 +++ whoopsie-daisy-0.1.4/src/whoopsie.c 2012-01-20 12:36:22.000000000 +0000 @@ -314,7 +314,7 @@ } gboolean -upload_core (const char* uuid, const char* core_data) { +upload_core (const char* uuid, const char* arch, const char* core_data) { CURL* curl = NULL; CURLcode result_code = 0; char* response_data = NULL; @@ -322,8 +322,8 @@ struct curl_slist* list = NULL; char* crash_db_core_url = NULL; - asprintf (&crash_db_core_url, "%s/%s/submit-core/%s", - crash_db_url, uuid, sha512_system_uuid); + asprintf (&crash_db_core_url, "%s/%s/submit-core/%s/%s", + crash_db_url, uuid, arch, sha512_system_uuid); /* TODO use CURLOPT_READFUNCTION to transparently compress data with * Snappy. */ @@ -366,6 +366,7 @@ char* response_data = NULL; char* command = NULL; char* core = NULL; + char* arch = NULL; bson b[1]; report = parse_report (crash_file); @@ -386,17 +387,17 @@ if (command) { if (strcmp (command, "CORE") == 0) { core = g_hash_table_lookup (report, "CoreDump"); - if (core) { - if (!upload_core (response_data, core)) { - /* TODO handle retrying? */ + arch = g_hash_table_lookup (report, "Architecture"); + if (core && arch) { + if (!upload_core (response_data, arch, core)) + /* We do not retry the upload. Once is a big enough hit + * to their Internet connection, and we can always + * count on the next person in line to send it. */ printf ("Upload of the core dump failed.\n"); - } - } else { + } else printf ("Asked for a core dump that we don't have.\n"); - } - } else { + } else printf ("Got command: %s\n", command); - } } } @@ -410,25 +411,44 @@ void create_file (const char* upload) { + /* TODO why are we strdup'ing this, rather than just leaving it as const on + * the g_queue? */ char* upload_file = g_strdup (upload); char* crash_file = upload_to_crash_file (upload_file); - - if (g_file_test (crash_file, G_FILE_TEST_EXISTS)) { - g_message ("%s exists", crash_file); - if (online_state && parse_and_upload_report (crash_file)) { - if (g_unlink (upload_file)) - g_warning ("Unable to remove: %s", upload_file); - free (crash_file); - } else { - g_warning ("Adding to queue: %s", upload_file); - g_queue_push_head (report_queue, (gpointer)upload_file); - } + char* uploaded_file = NULL; + int fd = -1; + asprintf (&uploaded_file, "%sed", upload_file); + + /* We've already handled this. */ + if (g_file_test (uploaded_file, G_FILE_TEST_EXISTS)) + goto out; + + /* Already cleaned up? Nothing more we can do. */ + if (!g_file_test (crash_file, G_FILE_TEST_EXISTS)) { + if ((fd = creat (uploaded_file, 0600)) < 0) + g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno)); + goto out; + } + + g_message ("%s exists", crash_file); + if (online_state && parse_and_upload_report (crash_file)) { + if ((fd = creat (uploaded_file, 0600)) < 0) + g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno)); + goto out; } else { - /* Already cleaned up? Nothing more we can do. */ - if (g_unlink (upload_file)) - g_warning ("Unable to remove: %s", upload_file); + g_warning ("Adding to queue: %s", upload_file); + g_queue_push_head (report_queue, (gpointer)upload_file); + /* Don't free the upload file, as we've pushed it onto the queue */ free (crash_file); + free (uploaded_file); + return; } + + out: + close (fd); + free (crash_file); + free (uploaded_file); + free (upload_file); } gboolean @@ -480,19 +500,27 @@ process_queue (void) { g_warning ("Processing queue."); GList* list = NULL; - char *upload_file, *crash_file = NULL; + char *upload_file, *crash_file, *uploaded_file = NULL; + int fd = -1; list = report_queue->head; while (list) { GList* next = list->next; upload_file = list->data; if (g_file_test (upload_file, G_FILE_TEST_EXISTS)) { crash_file = upload_to_crash_file (upload_file); - if (online_state && parse_and_upload_report (crash_file)) { - if (g_unlink (upload_file)) - g_warning ("Unable to remove: %s", upload_file); + asprintf (&uploaded_file, "%sed", upload_file); + + /* We've already handled this. */ + if (g_file_test (uploaded_file, G_FILE_TEST_EXISTS)) + remove_from_report_queue (upload_file); + + else if (online_state && parse_and_upload_report (crash_file)) { + if ((fd = creat (uploaded_file, 0600)) < 0) + g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno)); remove_from_report_queue (upload_file); } free (crash_file); + free (uploaded_file); } else { remove_from_report_queue (upload_file); } @@ -506,26 +534,36 @@ { GDir* dir = NULL; const gchar *file, *ext = NULL; - gchar* upload_file = NULL; - char* crash_file = NULL; + char *upload_file, *crash_file, *uploaded_file = NULL; + int fd = -1; dir = g_dir_open ("/var/crash", 0, NULL); while ((file = g_dir_read_name (dir)) != NULL) { upload_file = g_build_filename ("/var/crash", file, NULL); - ext = strrchr(upload_file, '.'); + ext = strrchr (upload_file, '.'); if (ext && strcmp(++ext, "upload") == 0) { crash_file = upload_to_crash_file (upload_file); - if (online_state && parse_and_upload_report (crash_file)) { - if (g_unlink (upload_file)) - g_warning ("Unable to remove: %s", upload_file); - free (crash_file); - free (upload_file); + asprintf (&uploaded_file, "%sed", upload_file); + + /* We've already handled this. */ + if (g_file_test (uploaded_file, G_FILE_TEST_EXISTS)) + remove_from_report_queue (upload_file); + + else if (online_state && parse_and_upload_report (crash_file)) { + if ((fd = creat (uploaded_file, 0600)) < 0) + g_warning ("Unable to create %s: %s", uploaded_file, strerror (errno)); } else { g_queue_push_head (report_queue, upload_file); + free (uploaded_file); + free (crash_file); + /* Don't free the upload file, as we've pushed it onto the + * queue */ + continue; } - } else { - free (upload_file); + free (uploaded_file); + free (crash_file); } + free (upload_file); } g_dir_close (dir); } @@ -649,49 +687,9 @@ } static void -create_namespace (void) -{ - /* We're going to override +t globally, so let's play it safe and restrict - * ourselves to only being able to write in /var/crash. */ - - mkdir ("/var/tmp/whoopsie", 0755); - - /* Set up a private mount namespace. */ - if (unshare (CLONE_NEWNS) == -1) - g_error ("CLONE_NEWNS failed."); - - if (mount ("/", "/var/tmp/whoopsie", NULL, MS_BIND | MS_REC | MS_RDONLY, NULL)) - g_error ("Could not bind mount /."); - - if (mount ("/var/crash", "/var/tmp/whoopsie/var/crash", NULL, MS_BIND, NULL)) - g_error ("Could not rw mount /var/crash."); - - if (chroot ("/var/tmp/whoopsie")) - g_error ("Could not chroot."); - - if (chdir ("/")) - g_error ("Could not chdir to /."); - - /* We don't need to worry about unmounting the above bind mounts, as once - * we leave the namespace, they will be released: - * http://lxr.linux.no/linux+v3.2.1/fs/namespace.c#L2736 */ -} - -static void drop_privileges (void) { struct passwd *pw = NULL; - cap_t cap; - /* Specify that we want to ignore the directory sticky bit */ - cap_value_t cap_list[] = {CAP_FOWNER}; - - if (!CAP_IS_SUPPORTED (CAP_SETFCAP)) - g_error ("SETFCAP is not supported."); - - - /* Ensure that we don't lose the capabilities when we drop privileges */ - if (prctl (PR_SET_KEEPCAPS, 1) < 0) - g_error ("prctl failed."); if (!(pw = getpwnam (username))) g_error ("Failed to find user: %s", username); @@ -705,23 +703,6 @@ setenv ("USER", username, 1); setenv ("USERNAME", username, 1); - - /* Now drop all capabilities but CAP_SETFCAP and CAP_FOWNER */ - cap = cap_init (); - if (cap == NULL) - g_error ("cap_get_proc failed."); - if (cap_set_flag (cap, CAP_EFFECTIVE, 1, cap_list, CAP_SET) == -1) - g_error ("cap_set_flag CAP_EFFECTIVE failed."); - if (cap_set_flag (cap, CAP_PERMITTED, 1, cap_list, CAP_SET) == -1) - g_error ("cap_set_flag CAP_PERMITTED failed."); - if (cap_set_proc (cap) == -1) - g_error ("cap_set_proc failed."); - cap_free (cap); - - cap_clear (cap); - cap = cap_get_proc (); - g_warning ("capabilities: %s\n", cap_to_text(cap, NULL)); - cap_free (cap); } void @@ -741,6 +722,8 @@ GNetworkMonitor* nm = NULL; GSocketConnectable *addr = NULL; addr = g_network_address_parse_uri (crash_db_url, 80, NULL); + if (addr == NULL) + return; nm = g_network_monitor_get_default (); if (!nm) @@ -779,7 +762,6 @@ } free (system_uuid); - create_namespace (); drop_privileges (); exit_if_already_running (); diff -Nru whoopsie-daisy-0.1.3/submit_core.wsgi whoopsie-daisy-0.1.4/submit_core.wsgi --- whoopsie-daisy-0.1.3/submit_core.wsgi 2012-01-11 12:18:04.000000000 +0000 +++ whoopsie-daisy-0.1.4/submit_core.wsgi 2012-01-24 10:35:23.000000000 +0000 @@ -23,24 +23,26 @@ import atexit host = '10.55.60.168' +ostream = 'application/octet-stream' connection = pika.BlockingConnection(pika.ConnectionParameters(host=host)) channel = connection.channel() -channel.queue_declare(queue='process_cores', durable=True) atexit.register(connection.close) def application(environ, start_response): params = parse_qs(environ.get('QUERY_STRING')) - if params and 'uuid' in params: + uuid = '' + if params and 'uuid' in params and 'arch' in params: uuid = escape(params['uuid'][0]) - if environ.has_key('CONTENT_TYPE') and environ['CONTENT_TYPE'] == 'application/octet-stream': - path = '/tmp/%s' % uuid + arch = escape(params['arch'][0]) + if environ.has_key('CONTENT_TYPE') and environ['CONTENT_TYPE'] == ostream: + path = '/srv/cores/%s' % uuid + queue = 'retrace_%s' % arch with open (path, 'w') as fp: shutil.copyfileobj(environ['wsgi.input'], fp, 512) + channel.queue_declare(queue=queue, durable=True) channel.basic_publish( - exchange='', routing_key='process_cores', body=path, + exchange='', routing_key=queue, body=path, properties=pika.BasicProperties(delivery_mode=2)) - start_response('200 OK', []) - return [uuid] start_response('200 OK', []) - return [''] + return [uuid] diff -Nru whoopsie-daisy-0.1.3/submit.wsgi whoopsie-daisy-0.1.4/submit.wsgi --- whoopsie-daisy-0.1.3/submit.wsgi 2012-01-18 17:02:59.000000000 +0000 +++ whoopsie-daisy-0.1.4/submit.wsgi 2012-01-19 12:29:18.000000000 +0000 @@ -60,10 +60,14 @@ data = None if environ.has_key(content_type) and environ[content_type] == ostream: data = environ['wsgi.input'].read() + user_token = None + # / + 128 character system UUID + if len(environ['PATH_INFO']) == 129: + user_token = environ['PATH_INFO'][1:] row_key = str(uuid.uuid1()) # TODO exceptions try: - key = oopses.insert_bson(oops_config, row_key, data) + key = oopses.insert_bson(oops_config, row_key, data, user_token) except bson.errors.InvalidBSON: start_response('400 Bad Request', []) return [] diff -Nru whoopsie-daisy-0.1.3/tools/purge.py whoopsie-daisy-0.1.4/tools/purge.py --- whoopsie-daisy-0.1.3/tools/purge.py 1970-01-01 00:00:00.000000000 +0000 +++ whoopsie-daisy-0.1.4/tools/purge.py 2012-01-24 10:45:11.000000000 +0000 @@ -0,0 +1,12 @@ +import pika +import atexit +import sys + +if len(sys.argv) < 2: + print 'usage:', sys.argv[0], '' + sys.exit(1) +host = '10.55.60.168' +conn = pika.BlockingConnection(pika.ConnectionParameters(host)) +atexit.register(conn.close) +channel = conn.channel() +channel.queue_purge(queue=sys.argv[1])