author = "stephen"
time = "2018 September 03"
import PyPDF2
from PyPDF2 import utils
import os
import shutil
class remove_water_mark (object):
def __init__(self):
self.pdf_path = "/home/shenjianlin/fake_pdf/"
self.remove_path="/home/shenjianlin/fake_remove_path/"
self.trash_pdf_path="/home/shenjianlin/rubbish_pdf/"
self.del_path="/home/shenjianlin/pdf_file/del_file/"
def get_pdf(self):
file_list = [ file for file in os.listdir(self.pdf_path) ]
return file_list
def read_content(self):
pdf_output = PyPDF2.PdfFileWriter()
fileList = self.get_pdf()
for old_file in fileList:
new_file=old_file.replace(" ","-").replace(""","").replace("" ","").replace("(","").replace(")","").replace(":","-")
old_file=self.pdf_path+old_file
if ".pdf" not in new_file:
new_file=new_file+".pdf"
new_file=self.pdf_path+new_file
os.rename(old_file,new_file)
if os.path.isfile(new_file) and ".py" not in new_file :
print("%s"%new_file)
try:
pdf = PyPDF2.PdfFileReader(open(new_file, "rb"))
except Exception as e:
print(e)
print("")
shutil.move(new_file, self.del_path + os.path.basename(new_file))
continue
if pdf.isEncrypted:
try:
pdf.decrypt("")
print("File decrypted pdf")
except:
command = ("cp " + new_file +
" temp.pdf; qpdf --password="" --decrypt temp.pdf " + new_file
+ "; rm temp.pdf")
os.system(command)
print("File Decrypted (qpdf)")
pdf = PyPDF2.PdfFileReader(open(new_file, "rb"))
flag=True
for i in range(0, pdf.getNumPages()):
if i == 1 and "FormXob.86cdf15f1994e2f2b7032e461afd4234" not in str(pdf.getPage(i).get("/Resources")):
print("")
print("%s" % os.path.basename(new_file))
shutil.copy(new_file,self.remove_path)
shutil.move(new_file, self.trash_pdf_path)
flag = False
break
elif i < 3 and"FormXob.86cdf15f1994e2f2b7032e461afd4234" in str(pdf.getPage(i).get("/Resources")):
print("")
Num_page_content = pdf.getPage(i)
if Num_page_content.get("/Resources"):
page_resource = Num_page_content["/Resources"]
if page_resource.get("/XObject"):
xobject = page_resource["/XObject"]
form = None
for item in xobject:
if item.startswith("/FormXob"):
form = item
if form:
print("remove water mark in page: {}".format(i))
xobject.pop(form)
pdf_output.addPage(Num_page_content)
else:
pdf_output.addPage(pdf.getPage(i))
if flag:
with open(self.remove_path+os.path.basename(new_file), "wb") as outfile:
try:
pdf_output.write(outfile)
except Exception as e:
print(e)
print("watermark is get over")
print("%s" % new_file)
shutil.move(new_file, self.trash_pdf_path+os.path.basename(new_file))
print("")
print("\n")
else:
print()
continue
if name = "_ _ main__":
remove_water_mark().read_content()
topic description
I want to transfer the file directly on the first page, but when I deal with pdf, I assign the first page of the first pdf to the first page of the second pdf, and the page is messed up
.