Source code for wolfhece.ftp.downloader

import ftplib
import os
import re

"""
MIT license: 2017 - Jwely

Example usage:

import ftplib
ftp = ftplib.FTP(mysite, username, password)
download_ftp_tree(ftp, remote_dir, local_dir)

The code above will look for a directory called "remote_dir" on the ftp host, and then duplicate the
directory and its entire contents into the "local_dir".

Note that if wget is an option, I recommend using that instead

"""


[docs] def _is_ftp_dir(ftp_handle, name, guess_by_extension=True): """ simply determines if an item listed on the ftp server is a valid directory or not """ # if the name has a "." in the fourth to last position, its probably a file extension # this is MUCH faster than trying to set every file to a working directory, and will work 99% of time. if guess_by_extension is True: if len(name) >= 4: if name[-4] == '.': return False original_cwd = ftp_handle.pwd() # remember the current working directory try: ftp_handle.cwd(name) # try to set directory to new name ftp_handle.cwd(original_cwd) # set it back to what it was return True except ftplib.error_perm as e: print(e) return False except Exception as e: print(e) return False
[docs] def _make_parent_dir(fpath): """ ensures the parent directory of a filepath exists """ dirname = os.path.dirname(fpath) while not os.path.exists(dirname): try: os.makedirs(dirname) print("created {0}".format(dirname)) except OSError as e: print(e) _make_parent_dir(dirname)
[docs] def _download_ftp_file(ftp_handle, name, dest, overwrite): """ downloads a single file from an ftp server """ #_make_parent_dir(dest.lstrip("/")) if not os.path.exists(dest) or overwrite is True: try: with open(dest, 'wb') as f: ftp_handle.retrbinary("RETR {0}".format(name), f.write) print("downloaded: {0}".format(dest)) except FileNotFoundError: print("FAILED: {0}".format(dest)) else: print("already exists: {0}".format(dest))
[docs] def _file_name_match_patern(pattern, name): """ returns True if filename matches the pattern""" if pattern is None: return True else: return bool(re.match(pattern, name))
[docs] def _mirror_ftp_dir(ftp_handle, name, overwrite, guess_by_extension, pattern): """ replicates a directory on an ftp server recursively """ ftp_handle.cwd(name) files=[] ftp_handle.dir(files.append) files = [curf.split()[8] for curf in files] for item in files: if _is_ftp_dir(ftp_handle, item, guess_by_extension): original_directory = os.getcwd() # remember working directory before function is executed os.makedirs(item, exist_ok=True) os.chdir(item) # change working directory to ftp mirror directory _mirror_ftp_dir(ftp_handle, item, overwrite, guess_by_extension, pattern) os.chdir(original_directory) ftp_handle.cwd('..') else: if _file_name_match_patern(pattern, name): _download_ftp_file(ftp_handle, item, item, overwrite) else: # quietly skip the file pass
[docs] def download_ftp_tree(ftp_handle, path, destination, pattern=None, overwrite=False, guess_by_extension=True): """ Downloads an entire directory tree from an ftp server to the local destination :param ftp_handle: an authenticated ftplib.FTP instance :param path: the folder on the ftp server to download :param destination: the local directory to store the copied folder :param pattern: Python regex pattern, only files that match this pattern will be downloaded. :param overwrite: set to True to force re-download of all files, even if they appear to exist already :param guess_by_extension: It takes a while to explicitly check if every item is a directory or a file. If this flag is set to True, it will assume any file ending with a three character extension ".???" is a file and not a directory. Set to False if some folders may have a "." in their names -4th position. """ path = path.lstrip("/") original_directory = os.getcwd() # remember working directory before function is executed os.chdir(destination) # change working directory to ftp mirror directory _mirror_ftp_dir( ftp_handle, path, pattern=pattern, overwrite=overwrite, guess_by_extension=guess_by_extension) os.chdir(original_directory) # reset working directory to what it was before function exec
if __name__ == "__main__": # Example usage mirroring all jpg files in an FTP directory tree.
[docs] mysite = "some_ftp_site"
username = "anonymous" password = None remote_dir = "" local_dir = "" pattern = ".*\.jpg$" ftp = ftplib.FTP(mysite, username, password) download_ftp_tree(ftp, remote_dir, local_dir, pattern=pattern, overwrite=False, guess_by_extension=True)