{"id":2874,"date":"2026-06-10T11:04:35","date_gmt":"2026-06-10T03:04:35","guid":{"rendered":"https:\/\/thereisno.top\/?p=2874"},"modified":"2026-06-10T11:04:35","modified_gmt":"2026-06-10T03:04:35","slug":"python-%e8%87%aa%e5%8a%a8%e5%8c%96pdf%e6%95%b0%e5%ad%97%e8%af%81%e4%b9%a6%e7%ad%be%e5%90%8d","status":"publish","type":"post","link":"https:\/\/thereisno.top\/?p=2874","title":{"rendered":"python \u81ea\u52a8\u5316pdf\u6570\u5b57\u8bc1\u4e66\u7b7e\u540d"},"content":{"rendered":"\n<h2 class=\"wp-block-heading\">\u80cc\u666f<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e4b\u524d\u4ecb\u7ecd\u8fc7<a href=\"https:\/\/thereisno.top\/?p=2854\">\u5c0f\u5fae\u4f01\u4e1a\u7535\u5b50\u5408\u540c\u9632\u7be1\u6539\u5b9e\u73b0<\/a>\uff0c\u624b\u52a8\u64cd\u4f5c\u6709\u4e9b\u7e41\u7410\uff0c\u80fd\u5426\u505a\u6210\u81ea\u52a8\u5316\u811a\u672c\uff1f<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u529f\u80fd<\/h2>\n\n\n\n<ol class=\"wp-block-list\">\n<li>pdf \u5bfc\u51fajpg<\/li>\n\n\n\n<li>jpg \u8f6cpdf<\/li>\n\n\n\n<li>\u6240\u6709\u8005\u6743\u9650\u63a7\u5236<\/li>\n\n\n\n<li>\u6570\u5b57\u7b7e\u540d<\/li>\n<\/ol>\n\n\n\n<h2 class=\"wp-block-heading\">\u5b9e\u73b0<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e0a\u97624\u4e2a\u529f\u80fd\uff0c\u6bcf\u4e2a\u529f\u80fd\u7684\u5b9e\u73b0\u53ef\u4ee5\u6709\u591a\u79cd\u5de5\u5177\uff0c\u4f46\u662f\u80fd\u591f\u5168\u90e8\u5b9e\u73b0\u7684\u5374\u6ca1\u627e\u5230\u3002\u53ea\u80fd\u7ed3\u5408\u591a\u4e2a\u5de5\u5177\u6765\u5b9e\u73b0\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">java \u5b9e\u73b0<\/h3>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">java \u5e73\u53f0\u53ea\u80fd\u5b9e\u73b0 1 2 3\/4 \u529f\u80fd\uff0c3\u548c4\u53ea\u80fd\u9009\u62e9\u4e00\u4e2a<\/p>\n<\/blockquote>\n\n\n\n<!--more-->\n\n\n\n<pre class=\"wp-block-preformatted\"># \u57fa\u7840\u8def\u5f84\u4e0e\u5bc6\u7801\u914d\u7f6e\nROOT_DIR=\"\/Users\/Downloads\"\nPDF_ROOT_DIR=\"${ROOT_DIR}\/workspace\"\nOUTPUT_DIR=\"${PDF_ROOT_DIR}\/signed_restricted_pdfs\"\nCERT_PATH=\"${PDF_ROOT_DIR}\/test.p12\"    # \u66ff\u6362\u4e3a\u4f60\u7684\u8bc1\u4e66\u5b9e\u9645\u8def\u5f84\nCERT_PASS=\"12345678\"                    # \u66ff\u6362\u4e3a\u8bc1\u4e66\u79c1\u94a5\u5bc6\u7801\nPERMISSION_PASS=\"owner_password\"        # \u66ff\u6362\u4e3aPDF\u7684\u6743\u9650\uff08\u7f16\u8f91\uff09\u5bc6\u7801\n\n# \u521b\u5efa\u8f93\u51fa\u76ee\u5f55\nmkdir -p \"$OUTPUT_DIR\"\n\n# 1. \u8f93\u51fa\u56fe\u7247\njava -jar ${ROOT_DIR}\/pdfbox-app-3.0.7.jar render -format=jpg -dpi=200  -i=input.pdf\n# 2. \u751f\u6210pdf\njava -jar ${ROOT_DIR}\/pdfbox-app-3.0.7.jar fromimage  -pageSize=A4 -autoOrientation -resize -o=output1.pdf -i=input-1.jpg\n\n# 3. \u52a0\u5bc6\njava -jar ${ROOT_DIR}\/pdfbox-app-3.0.7.jar encrypt \\\n  -O=\"$PERMISSION_PASS\" \\\n  -U=\"\" \\\n  -keyLength=256 \\\n  -canPrint=true \\\n  -canModify=false \\\n  -canExtractContent=false \\\n  -canAssemble=false \\\n  -canExtractForAccessibility=false \\\n  -canFillInForm=false \\\n  -canModifyAnnotations=false \\\n  -o=output2.pdf \\\n  -i=output1.pdf\n# 4. \u8bc1\u4e66\u7b7e\u540d\njava -jar ${ROOT_DIR}\/jsignpdf-3.1.0-BETA-3min\/lib\/jsignpdf-bootstrap-3.1.0-3.jar -kst PKCS12 \\\n         -ksf \"$CERT_PATH\" \\\n         -ksp \"$CERT_PASS\" \\\n         -opwd \"$PERMISSION_PASS\" \\\n         output2.pdf\n\n# 5. \u5904\u7406\u5b8c\u540e\uff0c\u5220\u9664\u6389\u4e34\u65f6\u6587\u4ef6\nrm  -rf input*.jpg\nrm  -rf output*.pdf\n<\/pre>\n\n\n\n<h3 class=\"wp-block-heading\">python \u5b9e\u73b0<\/h3>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">python \u5e73\u53f0\u5168\u90e8\u5b9e\u73b0 1 2 3 4 \u529f\u80fd<\/p>\n<\/blockquote>\n\n\n\n<pre class=\"wp-block-preformatted\">import glob\nimport pymupdf as fitz\nimport sys\nimport os\nfrom pyhanko.sign import signers,PdfSignatureMetadata\nfrom pyhanko.pdf_utils.incremental_writer import IncrementalPdfFileWriter\nfrom pyhanko.pdf_utils.reader import PdfFileReader\nfrom pyhanko.sign.fields import MDPPerm,SigSeedSubFilter\n\nimport argparse\nparser = argparse.ArgumentParser(description='location the pdf file')\nparser.add_argument(\"-f\",\"--pdf\", type=str, required=True, help=\"the path of pdf file\")\nargs = parser.parse_args()\n\npdf_file_path=args.pdf\n\nimgs_dir=\"\"\nfile_name=\"\"\n# \u57fa\u7840\u8def\u5f84\u4e0e\u5bc6\u7801\u914d\u7f6e\nROOT_DIR=\"\/Users\/Downloads\"\nPDF_ROOT_DIR=ROOT_DIR+\"\/workspace\"\nCERT_PATH=PDF_ROOT_DIR+\"\/test.pfx\"   # \u66ff\u6362\u4e3a\u4f60\u7684\u8bc1\u4e66\u5b9e\u9645\u8def\u5f84\nCERT_PASS=b'12345678'               # \u66ff\u6362\u4e3a\u8bc1\u4e66\u79c1\u94a5\u5bc6\u7801\nPERMISSION_PASS=\"OWNERPWD\"          # \u66ff\u6362\u4e3aPDF\u7684\u6743\u9650\uff08\u7f16\u8f91\uff09\u5bc6\u7801\n\ndef pdf_to_jpg(pdf_path):\n    if not os.path.exists(pdf_path):\n        print(f\"\u627e\u4e0d\u5230\u6587\u4ef6: {pdf_path}\")\n        return\n\n    # \u5728\u540c\u7ea7\u76ee\u5f55\u4e0b\u521b\u5efa\u4e00\u4e2a\u4ee5 PDF \u540d\u5b57\u547d\u540d\u7684\u6587\u4ef6\u5939\u5b58\u653e\u56fe\u7247\n    base_name = os.path.splitext(os.path.basename(pdf_path))\n    output_dir = os.path.join(os.path.dirname(pdf_path), f\"{base_name[0]}_images\")\n    print(f\"\u5df2\u521b\u5efa\u76ee\u5f55: {output_dir}\")\n    os.makedirs(output_dir, exist_ok=True)\n    global imgs_dir\n    imgs_dir=output_dir\n    global file_name\n    file_name=base_name[0]\n\n    doc = fitz.open(pdf_path)\n    zoom = 2.0  # 2\u500d\u7f29\u653e\uff0c\u4fdd\u8bc1\u6e05\u6670\u5ea6\n    mat = fitz.Matrix(zoom, zoom)\n\n    for i, page in enumerate(doc):\n        pix = page.get_pixmap(matrix=mat)\n        img_path = os.path.join(output_dir, f\"page_{i+1}.jpg\")\n        pix.save(img_path, jpg_quality=90)\n        print(f\"\u5df2\u5bfc\u51fa: {img_path}\")\n    \n    doc.close()\n    print(\"\u2705 PDF\u5bfc\u51fa\u56fe\u7247\u5168\u90e8\u5bfc\u51fa\u5b8c\u6210\uff01\")\n\n\ndef images_to_pdf(image_path, output_pdf_path=\"img2pdfout.pdf\"):\n    doc = fitz.open()\n    image_paths=os.listdir(image_path)\n    # \u6309\u7167\u6587\u4ef6\u540d\u6392\u5e8f\uff0c\u4fdd\u8bc1 PDF \u4e2d\u56fe\u7247\u7684\u987a\u5e8f\n    sorted_images = sorted(image_paths)\n    print(sorted_images)\n    for img_path in sorted_images:\n        if not img_path.lower().endswith(('.jpg', '.jpeg', '.png')):\n            print(f\"\u8df3\u8fc7\u975e\u56fe\u7247\u6587\u4ef6: {img_path}\")\n            continue\n        try:\n            img_doc = fitz.open(os.path.join(image_path, img_path))\n            rect = img_doc[0].rect\n            page = doc.new_page(width=rect.width, height=rect.height)\n            # \u5c06\u56fe\u7247\u94fa\u6ee1\u6574\u4e2a\u9875\u9762\n            # page.show_pdf_page(rect, img_doc, 0)\n            page.insert_image(rect, filename=os.path.join(image_path, img_path))\n            img_doc.close()\n            print(f\"\u5df2\u6dfb\u52a0\u56fe\u7247: {os.path.basename(img_path)}\")\n        except Exception as e:\n            print(f\"\u5904\u7406\u56fe\u7247 {img_path} \u65f6\u51fa\u9519: {e}\")\n            continue\n            \n    if len(doc) == 0:\n        print(\"\u6ca1\u6709\u6709\u6548\u7684\u56fe\u7247\u53ef\u4ee5\u8f6c\u6362\u4e3a PDF\")\n        return\n        \n    doc.save(imgs_dir+\"\/\"+output_pdf_path)\n    doc.close()\n    print(f\"\u2705 \u6210\u529f\u751f\u6210 PDF \u6587\u4ef6: {imgs_dir}\/{output_pdf_path}\")\n\ndef protect_pdf(input_path, output_path, owner_pw, user_pw=\"\", can_print=True, can_copy=False):\n    try:\n        doc = fitz.open(input_path)\n        \n        # \u57fa\u7840\u6743\u9650\uff1a\u5141\u8bb8\u8f85\u52a9\u529f\u80fd\u8bfb\u53d6\n        permissions = 0\n        if can_print:\n            permissions |= fitz.PDF_PERM_PRINT\n        if can_copy:\n            permissions |= fitz.PDF_PERM_COPY\n        # \u5982\u679c\u9700\u8981\u5141\u8bb8\u6ce8\u91ca\/\u586b\u5199\u8868\u5355\uff0c\u53ef\u4ee5\u52a0\u4e0a fitz.PDF_PERM_ANNOTATE\n        \n        doc.save(\n            output_path,\n            encryption=fitz.PDF_ENCRYPT_AES_256,\n            owner_pw=owner_pw,\n            user_pw=user_pw,\n            permissions=permissions,\n        )\n        doc.close()\n        print(f\"\u2705 PDF \u5df2\u6210\u529f\u52a0\u5bc6\u5e76\u4fdd\u5b58\u81f3: {output_path}\")\n    except Exception as e:\n        print(f\"\u5904\u7406\u6587\u4ef6 {input_path} \u65f6\u51fa\u9519: {e}\")\n\n\ndef changepdf(input_path, output_path):\n    doc = fitz.open(input_path)\n\n    # \u91cd\u65b0\u6253\u5f00\uff0c\u4fee\u590d\u76f4\u63a5\u5bf9\u8c61\u95ee\u9898\n    doc = fitz.open(output_path)\n    # \u83b7\u53d6 Trailer \u4e2d\u7684 \/Encrypt \u952e\u7684\u7c7b\u578b\u548c\u503c\n    what, xref_or_str = doc.xref_get_key(-1, \"Encrypt\")\n\n    # \u5982\u679c\u8fd4\u56de\u7684\u662f \"null\" \u6216\u5176\u4ed6\uff0c\u8bf4\u660e\u6ca1\u52a0\u5bc6\uff1b\u5982\u679c\u662f \"xref\"\uff0c\u8bf4\u660e\u5df2\u7ecf\u662f\u95f4\u63a5\u5bf9\u8c61\n    # \u5982\u679c\u8fd4\u56de\u7684\u662f \"dict\"\uff0c\u8bf4\u660e\u5b83\u662f\u76f4\u63a5\u5bf9\u8c61\uff0c\u9700\u8981\u4fee\u590d\n    if what == \"dict\":\n        # \u5c06\u76f4\u63a5\u5bf9\u8c61\u8f6c\u6362\u4e3a\u95f4\u63a5\u5bf9\u8c61\n        # 1. \u521b\u5efa\u4e00\u4e2a\u65b0\u7684\u95f4\u63a5\u5bf9\u8c61\n        new_xref = doc.get_new_xref()\n        # 2. \u5c06\u539f\u6765\u7684\u76f4\u63a5\u5bf9\u8c61\u5185\u5bb9\u5199\u5165\u65b0\u7684\u95f4\u63a5\u5bf9\u8c61\n        doc.update_object(new_xref, xref_or_str)\n        # 3. \u5c06 Trailer \u4e2d\u7684 \/Encrypt \u6307\u5411\u8fd9\u4e2a\u65b0\u7684\u95f4\u63a5\u5bf9\u8c61\n        doc.xref_set_key(-1, \"Encrypt\", f\"{new_xref} 0 R\")\n        \n        # \u589e\u91cf\u4fdd\u5b58\u4fee\u590d\u7ed3\u679c\n        doc.saveIncr()\n\n    doc.close()\n    # \u6b64\u65f6\u751f\u6210\u7684 encrypted.pdf \u5c31\u53ef\u4ee5\u88ab PyHanko \u6b63\u5e38\u8bfb\u53d6\u4e86\n    print(f\"\u2705 PDF \u52a0\u5bc6\u683c\u5f0f\u5df2\u7ecf\u8f6c\u6362: {output_path}\")\n\n\ndef sign_pdf(input_path, output_path, cert_path, cert_pass, permission_pass, sig_meta):\n    # ====================== \u914d\u7f6e\u533a ======================\n    # input_path            # \u5f85\u7b7e\u540dPDF\n    # output_path           # \u7b7e\u540d\u540ePDF\n    # cert_path             # \u4f60\u7684PFX\u8bc1\u4e66\n    # cert_pass             # \u8bc1\u4e66\u5bc6\u7801\n    # ====================================================\n\n    # 1. \u52a0\u8f7dPFX\u8bc1\u4e66\n    signer = signers.SimpleSigner.load_pkcs12(\n        pfx_file=cert_path,\n        passphrase=cert_pass  # \u5bc6\u7801\u5fc5\u987b\u8f6cbytes\n    )\n\n    # 2. \u3010\u4e0d\u53ef\u89c1\u6570\u5b57\u7b7e\u540d\u3011\u6838\u5fc3\u4ee3\u7801\n    with open(input_path, \"rb\") as input_file,open(output_path, \"wb\") as output_file:\n        reader = PdfFileReader(input_file)\n        reader.decrypt(permission_pass)\n        input_file.seek(0)  # \u89e3\u5bc6\u540e\u91cd\u7f6e\u6587\u4ef6\u6307\u9488\u5230\u5f00\u5934\n        # \u589e\u91cf\u5199\u5165\uff08\u4e0d\u7834\u574f\u539fPDF\u7ed3\u6784\uff09\n        writer = IncrementalPdfFileWriter(input_file)\n        writer.encrypt(permission_pass)\n        \n        signers.sign_pdf(\n            writer,\n            signer=signer,\n            output=output_file,\n            signature_meta=sig_meta\n            \n        )\n\n    print(f\"\u2705 \u7b7e\u540d\u5b8c\u6210\uff01\u8f93\u51fa\u6587\u4ef6\uff1a{output_path}\")\n    print(\"\u2139\ufe0f  \u539fPDF\u7684\u6240\u6709\u8005\u5bc6\u7801\/\u6743\u9650\u5df2\u4fdd\u7559\")\n\n\n\nif __name__ == \"__main__\":\n    \n    pdf_to_jpg(pdf_file_path)\n    images_to_pdf(imgs_dir,file_name+\"_imgpdf.pdf\")\n    protect_pdf(imgs_dir+\"\/\"+file_name+\"_imgpdf.pdf\", imgs_dir+\"\/\"+file_name+\"_protected.pdf\", PERMISSION_PASS, \"\", can_print=True, can_copy=False)\n\n    sig_meta=PdfSignatureMetadata(\n        # \u7b7e\u540d\u57fa\u672c\u4fe1\u606f\n        reason=\"\u6587\u6863\u6b63\u5f0f\u7b7e\u7f72\",\n        location=\"\u5317\u4eac\",\n        name=\"\u7b7e\u540d\u4eba\u59d3\u540d\",   # \u2705 \u7528 name \u4ee3\u66ff contact\n        # PAdES \u7b49\u7ea7\uff08\u5e38\u7528\uff09\n        subfilter=SigSeedSubFilter.PADES,                # \u542f\u7528 PAdES\n\n        field_name=\"Signature1\",#  \u2705 \u5fc5\u987b\u6307\u5b9a\u4e00\u4e2a\u552f\u4e00\u7684\u5b57\u6bb5\u540d\n        md_algorithm=\"sha256\",\n        certify=False,\n        docmdp_permissions=MDPPerm.FILL_FORMS,  # \u2705 \u6b63\u786e\u540d\u5b57\u662f docmdp_permissions\n    )\n    changepdf(imgs_dir+\"\/\"+file_name+\"_protected.pdf\", imgs_dir+\"\/\"+file_name+\"_protected.pdf\")\n    sign_pdf(imgs_dir+\"\/\"+file_name+\"_protected.pdf\", imgs_dir+\"\/\"+file_name+\"_signed.pdf\", CERT_PATH, CERT_PASS, PERMISSION_PASS, sig_meta)\n\n    # \u6e05\u7406\u4e34\u65f6\u6587\u4ef6\n    os.remove(imgs_dir+\"\/\"+file_name+\"_imgpdf.pdf\")\n    os.remove(imgs_dir+\"\/\"+file_name+\"_protected.pdf\")\n    for f in glob.glob(imgs_dir+\"\/page_*.jpg\"):\n        os.remove(f)\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u80cc\u666f \u4e4b\u524d\u4ecb\u7ecd\u8fc7\u5c0f\u5fae\u4f01\u4e1a\u7535\u5b50\u5408\u540c\u9632\u7be1\u6539\u5b9e\u73b0\uff0c\u624b\u52a8\u64cd\u4f5c\u6709\u4e9b\u7e41\u7410\uff0c\u80fd\u5426\u505a\u6210\u81ea\u52a8\u5316\u811a\u672c\uff1f \u529f\u80fd \u5b9e\u73b0 \u4e0a\u97624\u4e2a\u529f\u80fd\uff0c &hellip; <\/p>\n<p class=\"link-more\"><a href=\"https:\/\/thereisno.top\/?p=2874\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u201cpython \u81ea\u52a8\u5316pdf\u6570\u5b57\u8bc1\u4e66\u7b7e\u540d\u201d<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[14],"tags":[9,308],"class_list":["post-2874","post","type-post","status-publish","format-standard","hentry","category-python","tag-python","tag-308"],"_links":{"self":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts\/2874","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2874"}],"version-history":[{"count":1,"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts\/2874\/revisions"}],"predecessor-version":[{"id":2875,"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts\/2874\/revisions\/2875"}],"wp:attachment":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2874"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=2874"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=2874"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}