Comment 9 for bug 1031954

Revision history for this message
Darrell Bishop (darrellb) wrote :

David,
Here are my results with a hybrid JSON/struct approach included: https://gist.github.com/859ba4995a3df9f45913#file_report_2.7.markdown

As you predicted, the JSON/struct hybrid performs extremely well. Here's the code in case you're having trouble viewing the gist:

def json_hybrid_serialize_ring(ring, filename):
    gz = GzipFile(filename, 'wb', compresslevel=GZ_LEVEL)
    json_text = json.dumps([ring['part_shift'], ring['devs']])
    json_len = len(json_text)
    gz.write(struct.pack('!I%ds' % json_len,
                         json_len, json_text))
    gz.write(struct.pack('!H', len(ring['replica2part2dev_id'])))
    for part2dev_id in ring['replica2part2dev_id']:
        part_count = len(part2dev_id)
        gz.write(struct.pack(
            '!II%ds' % (part_count * part2dev_id.itemsize,),
            part_count, part2dev_id.itemsize, part2dev_id.tostring()))
    gz.close()

def json_hybrid_deserialize_ring(filename):
    gz = GzipFile(filename)
    ring_dict = {
        'replica2part2dev_id': [],
    }
    json_len, = struct.unpack('!I', gz.read(4))
    ring_dict['part_shift'], ring_dict['devs'] = json.loads(gz.read(json_len))
    replica_count, = struct.unpack('!H', gz.read(2))
    for _ in range(replica_count):
        part_count, part_size = struct.unpack('!II', gz.read(8))
        ring_dict['replica2part2dev_id'].append(
            array.array('H', gz.read(part_count * part_size)))
    return ring_dict

The on-disk structure is:
<json_len><json_text><replica_count>[<part_count><part_size><part_data>...]