Linux-Python-File-Sync

2021-01-02

Python 文件自动同步备份

转载：https://www.52pojie.cn/thread-1337396-1-1.html

需求：平台会把虚拟机备份的文件打包到服务器A，再同步备份到服务器 B（只需要考虑 A 到 B）。

思路：
服务器 A 作为服务端，定时遍历自己的文件目录，把文件目录信息打包成一个校验文件。
服务器 B 作为客户端，下载校验文件，遍历自己的文件目录是否和服务器相同，并下载本地没有的文件。
通过 http 传输，使用 python 开启一个简单的 http 服务。

生产环境：python3.7.9，两台 CentOS7.9 服务器。

在服务端的备份目录下开启 http 服务：

nohup 是用来后台开启 http 服务的，不然控制台没法干其他事情。

1 2	cd /H3C_Backup/ nohup python3 -m http.sever 8000 &

服务端代码

import os
 
path = '/H3C_Backup'
 
def func(path):
    contents = os.walk(path, topdown=True)
    dir = []
    file = []
    for (root, dirs, files) in contents:
        dir.append(root)
        for i in files:
            file.append(root+'/'+i)
    return [dir, file]
 
content = func(path)
 
with open(path+'/'+'content.txt', 'w', encoding='utf-8') as f:
    for i in content[0]:
        f.write(i)
        f.write('\n')
 
with open(path+'/'+'file.txt', 'w', encoding='utf-8') as f:
    for i in content[1]:
        f.write(i)
        f.write('\n')

客户端代码

import os
import time
import shutil
import multiprocess
import requests
 
 
def init() :
    url = ['http://172.172.172.1:8000/file.txt', 'http://172.172.172.1:8000/content.txt']
    download_file = requests.get(url[0], stream=True)
    with open('/download/file.txt', 'wb') as f :
        for chunk in download_file.iter_content(chunk_size=4096) :
            f.write(chunk)
 
    download_content = requests.get(url[1], stream=True)
    with open('/download/content.txt', 'wb') as f :
        for chunk in download_content.iter_content(chunk_size=4096) :
            f.write(chunk)
 
 
def function(path) :
    # 通过os.walk()方法遍历到所有文件夹和文件
    file = []
    dir = []
    x = os.walk(path, topdown=True)
    for (root, dirs, files) in x :
        dir.append(root)
        for i in files :
            file.append(root + '/' + i)
    return [dir, file]
 
 
def check_dir(path) :
    # 获取本地目录
    x = function(path)
    dir_so = x[0]
 
    # 清洗服务端目录
    dirs = open('/download/content.txt', 'r', encoding='utf-8')
    dir_dst = dirs.readlines()
    dir_dst_info = []
    for i in dir_dst :
        i = i.replace('\n', '')
        print(i)
        dir_dst_info.append(i)
 
        # 比较目录，目录不一致就添加
    for i in dir_dst_info[1 :] + dir_so :
        if i not in dir_so :
            os.mkdir(i)
            print('创建了' + i)
        if i not in dir_dst_info :
            try :
                shutil.rmtree(i)
                print('删除了' + i)
            except :
                pass
 
 
def download(url, path) :
    download_file = requests.get(url, stream=True)
    with open(path, 'wb') as f :
        for chunk in download_file.iter_content(chunk_size=10240) :
            f.write(chunk)
            print('添加了' + path)
 
 
def check_file(path) :
    x = function(path)
    file_so = x[1]
    pool = multiprocessing.Pool(processes=10)
    # 清洗服务端文件
    files = open('/download/file.txt', 'r', encoding='utf-8')
    files_dst = files.readlines()
    files_dst_info = []
    for i in files_dst :
        i = i.replace('\n', '')
        files_dst_info.append(i)
 
    # 没有的下载,多余的删掉
    for i in file_so + files_dst_info :
        if i not in file_so :
            url = 'http://172.172.172.1:8000' + i
            pool.apply_async(download, (url, i,))
 
        if i not in files_dst_info :
            os.remove(i)
            print('删除了' + i)
    pool.close()
    pool.join()
 
if __name__ == '__main__' :
    path = '/H3C_Backup'
    init()
    check_dir(path)
    check_file(path)

该代码还没有做测试,先码着有空再做测试。