import tempfile import quopri import base64 import uu import types from email import Message def _copydecoder(fin, fout): while 1: buf = fin.read(8192) if not buf: break fout.write(buf) class DiskMessage(Message.Message): """ This class modifies the standard email.Message class so that message payloads are stored on disk in temporary files rather than in memory. This allows handling of messages that are bigger than available memory. The existing Message class API is not broken, only extended. Body decoding is on-demand and cached to avoid needless decoding. New methods: newtempfile() get_payloadf() feed_payload() To take full advantage of this class and avoid loading entire attachments into memory modified Parser and Generator classes should be used. These should use the new get_payloadf() and feed_payload() methods. See DiskFeedParser. Some care must be taken by the caller as the internal file handles that hold the message payloads are exposed to the outside. The state of these handles may change during calls to class instances. Callers must be aware that this class may raise IOErrors. """ def __init__(self): Message.Message.__init__(self) self._dc_payload = None # File pointer to decoded payload def newtempfile(self): """Return a new file handle to be used for message body storage. Subclasses may wish to override this to modify the temporary file creation scheme. """ return tempfile.TemporaryFile() def add_payload(self, payload): raise NotImplementedError def _init_decoded(self, force=0): """Create/update the decoded payload file """ if self.is_multipart(): return if self._dc_payload and not force: return self._dc_payload = self.newtempfile() cte = self.get('content-transfer-encoding', '').lower() if cte == 'quoted-printable': decoder = quopri.decode elif cte == 'base64': decoder = base64.decode elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): decoder = uu.decode else: decoder = _copydecoder self._payload.seek(0, 0) decoder(self._payload, self._dc_payload) def clear_decoded(self): """Reset the cached decoded message body. This will force a fresh decoded on the next get_payload(decode=True) or get_payloadf(decode=True). """ self._dc_payload = None def get_payload(self, i=None, decode=False): """Same API as standard Message class. This one just reads the contents of the payload files into a string and returns that. """ if i is None: if self.is_multipart(): if decode: return None else: return self._payload else: if decode: self._init_decoded() retf = self._dc_payload else: retf = self._payload retf.seek(0, 0) return retf.read() elif not isinstance(self._payload, list): raise TypeError, 'Expected list, got %s' % type(self._payload) else: return self._payload[i] def get_payloadf(self, decode=False): """Return a file pointer corresponding to the message payload Returns None for multipart sections. """ if self.is_multipart(): return None if decode: self._init_decoded() retf = self._dc_payload else: retf = self._payload retf.seek(0, 0) # Rewind to the start of the file return retf def set_payload(self, payload, charset=None): """Set the payload to the given value. may be a string or a file handle. Optional charset sets the message's default character set. See set_charset() for details. """ if type(payload) == types.StringType: self._payload = self.newtempfile() self._payload.write(payload) else: self._payload = payload self.clear_decoded() if charset is not None: self.set_charset(charset) def feed_payload(self, buf): """Add data to the message payload """ if not self._payload: self._payload = self.newtempfile() self._payload.write(buf) self.clear_decoded()