{"id":1435,"date":"2024-10-29T17:04:57","date_gmt":"2024-10-29T09:04:57","guid":{"rendered":"https:\/\/thereisno.top\/?p=1435"},"modified":"2024-12-19T13:52:50","modified_gmt":"2024-12-19T05:52:50","slug":"%e7%a5%9e%e7%bb%8f%e7%bd%91%e7%bb%9c%e4%b9%8b%e5%8f%8d%e5%90%91%e4%bc%a0%e6%92%ad%e7%ae%97%e6%b3%95%ef%bc%88bp%ef%bc%89","status":"publish","type":"post","link":"https:\/\/thereisno.top\/?p=1435","title":{"rendered":"\u795e\u7ecf\u7f51\u7edc\u4e4b\u53cd\u5411\u4f20\u64ad\u7b97\u6cd5\uff08BP\uff09"},"content":{"rendered":"\n<p>\u53cd\u5411\u4f20\u64ad\uff08\u82f1\u8bed\uff1aBackpropagation\uff0c\u7f29\u5199\u4e3aBP\uff09\u662f\u201c\u8bef\u5dee\u53cd\u5411\u4f20\u64ad\u201d\u7684\u7b80\u79f0\uff0c\u662f\u4e00\u79cd\u4e0e\u6700\u4f18\u5316\u65b9\u6cd5\uff08\u5982\u68af\u5ea6\u4e0b\u964d\u6cd5\uff09\u7ed3\u5408\u4f7f\u7528\u7684\uff0c\u7528\u6765\u8bad\u7ec3\u4eba\u5de5\u795e\u7ecf\u7f51\u7edc\u7684\u5e38\u89c1\u65b9\u6cd5\u3002\u8be5\u65b9\u6cd5\u5bf9\u7f51\u7edc\u4e2d\u6240\u6709\u6743\u91cd\u8ba1\u7b97\u635f\u5931\u51fd\u6570\u7684\u68af\u5ea6\u3002\u8fd9\u4e2a\u68af\u5ea6\u4f1a\u53cd\u9988\u7ed9\u6700\u4f18\u5316\u65b9\u6cd5\uff0c\u7528\u6765\u66f4\u65b0\u6743\u503c\u4ee5\u6700\u5c0f\u5316\u635f\u5931\u51fd\u6570\u3002 \u5728\u795e\u7ecf\u7f51\u7edc\u4e0a\u6267\u884c\u68af\u5ea6\u4e0b\u964d\u6cd5\u7684\u4e3b\u8981\u7b97\u6cd5\u3002\u8be5\u7b97\u6cd5\u4f1a\u5148\u6309\u524d\u5411\u4f20\u64ad\u65b9\u5f0f\u8ba1\u7b97\uff08\u5e76\u7f13\u5b58\uff09\u6bcf\u4e2a\u8282\u70b9\u7684\u8f93\u51fa\u503c\uff0c\u7136\u540e\u518d\u6309\u53cd\u5411\u4f20\u64ad\u904d\u5386\u56fe\u7684\u65b9\u5f0f\u8ba1\u7b97\u635f\u5931\u51fd\u6570\u503c\u76f8\u5bf9\u4e8e\u6bcf\u4e2a\u53c2\u6570\u7684\u504f\u5bfc\u6570\u3002<\/p>\n<p>\u6211\u4eec\u5c06\u4ee5\u5168\u8fde\u63a5\u5c42\uff0c\u6fc0\u6d3b\u51fd\u6570\u91c7\u7528 <code>Sigmoid<\/code> \u51fd\u6570\uff0c\u8bef\u5dee\u51fd\u6570\u4e3a <code>Softmax+MSE<\/code> \u635f\u5931\u51fd\u6570\u7684\u795e\u7ecf\u7f51\u7edc\u4e3a\u4f8b\uff0c\u63a8\u5bfc\u5176\u68af\u5ea6\u4f20\u64ad\u65b9\u5f0f\u3002<\/p>\n<h2 id=\"\u51c6\u5907\u5de5\u4f5c\">\u51c6\u5907\u5de5\u4f5c<\/h2>\n<h3 id=\"sigmoid-\u51fd\u6570\u7684\u5bfc\u6570\">1\u3001Sigmoid \u51fd\u6570\u7684\u5bfc\u6570<\/h3>\n\n\n\n<!--more-->\n\n\n\n<p>\u56de\u987e <code>sigmoid<\/code> \u51fd\u6570\u7684\u8868\u8fbe\u5f0f\uff1a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mi>x<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>=<\/mo><mfrac><mn>1<\/mn><mrow><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\sigma(x) =\\frac{1}{1+e^{-x}}\n<\/annotation><\/semantics><\/math> \u5176\u5bfc\u6570\u4e3a\uff1a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mi>d<\/mi><mrow><mi>d<\/mi><mi>x<\/mi><\/mrow><\/mfrac><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mi>x<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>=<\/mo><mfrac><mi>d<\/mi><mrow><mi>d<\/mi><mi>x<\/mi><\/mrow><\/mfrac><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mfrac><mn>1<\/mn><mrow><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><\/mrow><\/mfrac><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{d}{dx}\\sigma(x) =\\frac{d}{dx} \\left(\\frac{1}{1+e^{-x}} \\right)\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo>=<\/mo><mfrac><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n=\\frac{e^{-x}}{(1+e^{-x})^2}\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo>=<\/mo><mfrac><mrow><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u2212<\/mo><mn>1<\/mn><\/mrow><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n=\\frac{(1 + e^{-x})-1}{(1+e^{-x})^2}\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo>=<\/mo><mfrac><mrow><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><\/mrow><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mfrac><mo>\u2212<\/mo><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mfrac><mn>1<\/mn><mrow><mn>1<\/mn><mo>+<\/mo><msup><mi>e<\/mi><mrow><mo>\u2212<\/mo><mi>x<\/mi><\/mrow><\/msup><\/mrow><\/mfrac><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">\n=\\frac{1+e^{-x}}{(1+e^{-x})^2} &#8211; \\left(\\frac{1}{1+e^{-x}}\\right)^2\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mi>x<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u2212<\/mo><mi>\u03c3<\/mi><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mi>x<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">\n=\\sigma(x) &#8211; \\sigma(x)^2\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><mi>\u03c3<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\n=\\sigma(1-\\sigma)\n<\/annotation><\/semantics><\/math> \u53ef\u4ee5\u770b\u5230,<code>Sigmoid<\/code> \u51fd\u6570\u7684\u5bfc\u6570\u8868\u8fbe\u5f0f\u6700\u7ec8\u53ef\u4ee5\u8868\u8fbe\u4e3a\u6fc0\u6d3b\u51fd\u6570\u7684\u8f93\u51fa\u503c\u7684\u7b80\u5355\u8fd0\u7b97,\u5229<br> \u7528\u8fd9\u4e00\u6027\u8d28,\u5728\u795e\u7ecf\u7f51\u7edc\u7684\u68af\u5ea6\u8ba1\u7b97\u4e2d,\u901a\u8fc7\u7f13\u5b58\u6bcf\u5c42\u7684 Sigmoid \u51fd\u6570\u8f93\u51fa\u503c,\u5373\u53ef\u5728\u9700<br> \u8981\u7684\u65f6\u5019\u8ba1\u7b97\u51fa\u5176\u5bfc\u6570\u3002Sigmoid \u51fd\u6570\u5bfc\u6570\u7684\u5b9e\u73b0\uff1a<\/p>\n<pre><code>\nimport numpy as np # \u5bfc\u5165 numpy\n\ndef sigmoid(x): # sigmoid \u51fd\u6570\n    return 1 \/ (1 + np.exp(-x))\n\ndef derivative(x): # sigmoid \u5bfc\u6570\u7684\u8ba1\u7b97\n    return sigmoid(x)*(1-sigmoid(x))\n<\/code><\/pre>\n<h3 id=\"\u5747\u65b9\u5dee\u51fd\u6570\u68af\u5ea6\">2\u3001\u5747\u65b9\u5dee\u51fd\u6570\u68af\u5ea6<\/h3>\n<p>\u5747\u65b9\u5dee\u635f\u5931\u51fd\u6570\u8868\u8fbe\u5f0f\u4e3a\uff1a <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>L<\/mi><mo>=<\/mo><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><munderover><mo>\u2211<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>K<\/mi><\/munderover><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>y<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">\nL =\\frac{1}{2}\\sum_{k=1}^{K}(y_k-o_k)^2\n<\/annotation><\/semantics><\/math> \u5176\u4e2d <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>y<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">y_k<\/annotation><\/semantics><\/math> \u4e3a\u771f\u5b9e\u503c\uff0c<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">o_k<\/annotation><\/semantics><\/math>\u4e3a\u8f93\u51fa\u503c\u3002\u5219\u5b83\u7684\u504f\u5bfc\u6570 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial L}{\\partial o_i}<\/annotation><\/semantics><\/math> \u53ef\u4ee5\u5c55\u5f00\u4e3a\uff1a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><munderover><mo>\u2211<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>K<\/mi><\/munderover><mfrac><mi>\u2202<\/mi><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>y<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial o_i} =\\frac{1}{2}\\sum_{k=1}^{K}\\frac{\\partial}{\\partial o_i}(y_k &#8211; o_k)^2\n<\/annotation><\/semantics><\/math> \u5229\u7528\u94fe\u5f0f\u6cd5\u5219\u5206\u89e3\u4e3a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><munderover><mo>\u2211<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>K<\/mi><\/munderover><mo>\u22c5<\/mo><mn>2<\/mn><mo>\u22c5<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>y<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><mfrac><mrow><mi>\u2202<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>y<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial o_i} =\\frac{1}{2}\\sum_{k=1}^{K}\\cdot2\\cdot(y_k-o_k)\\cdot\\frac{\\partial(y_k-o_k)}{\\partial o_i} \n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><mo>=<\/mo><munderover><mo>\u2211<\/mo><mrow><mi>k<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>K<\/mi><\/munderover><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>y<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mo>\u2212<\/mo><mn>1<\/mn><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial o_i} =\\sum_{k=1}^{K}(y_k-o_k)\\cdot(-1)\\cdot\\frac{\\partial o_k}{\\partial o_i}\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial o_k}{\\partial o_i}<\/annotation><\/semantics><\/math> \u4ec5\u5f53 <code>k =i<\/code> \u65f6\u624d\u4e3a <code>1<\/code>,\u5176\u4ed6\u70b9\u90fd\u4e3a<code> 0<\/code>, \u4e5f\u5c31\u662f\u8bf4<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial o_k}{\\partial o_i}<\/annotation><\/semantics><\/math> \u53ea\u4e0e\u7b2c <code>i <\/code>\u53f7\u8282\u70b9\u76f8\u5173\uff0c\u4e0e\u5176\u4ed6\u8282\u70b9\u65e0\u5173\uff0c\u56e0\u6b64\u4e0a\u5f0f\u4e2d\u7684\u6c42\u548c\u7b26\u53f7\u53ef\u4ee5\u53bb\u6389\uff0c\u5747\u65b9\u5dee\u7684\u5bfc\u6570\u53ef\u4ee5\u63a8\u5bfc\u4e3a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>i<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>y<\/mi><mi>i<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial o_i} =(o_i &#8211; y_i)\n<\/annotation><\/semantics><\/math><\/p>\n<h3 id=\"\u5355\u4e2a\u795e\u7ecf\u5143\u68af\u5ea6\">3\u3001\u5355\u4e2a\u795e\u7ecf\u5143\u68af\u5ea6<\/h3>\n<p>\u5bf9\u4e8e\u91c7\u7528 <code>Sigmoid<\/code> \u6fc0\u6d3b\u51fd\u6570\u7684\u795e\u7ecf\u5143\u6a21\u578b,\u5b83\u7684\u6570\u5b66\u6a21\u578b\u53ef\u4ee5\u5199\u4e3a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msup><mi>o<\/mi><mn>1<\/mn><\/msup><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msup><mi>w<\/mi><mn>1<\/mn><\/msup><mi>x<\/mi><mo>+<\/mo><msup><mi>b<\/mi><mn>1<\/mn><\/msup><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\no^1 =\\sigma(w^1x+b^1)\n<\/annotation><\/semantics><\/math> \u5176\u4e2d<\/p>\n<ul>\n<li>\u53d8\u91cf\u7684\u4e0a\u6807\u8868\u793a\u5c42\u6570\uff0c\u5982 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msup><mi>o<\/mi><mn>1<\/mn><\/msup><annotation encoding=\"application\/x-tex\">o^1<\/annotation><\/semantics><\/math> \u8868\u793a\u7b2c\u4e00\u4e2a\u9690\u85cf\u5c42\u7684\u8f93\u51fa<\/li>\n<li><code>x<\/code> \u8868\u793a\u7f51\u7edc\u7684\u8f93\u5165<\/li>\n<\/ul>\n<p>\u5355\u4e2a\u795e\u7ecf\u5143\u6a21\u578b\u5982\u4e0b\u56fe\u6240\u793a<\/p>\n<ul>\n<li>\u8f93\u5165\u8282\u70b9\u6570\u4e3a <code>J<\/code>\n<ul>\n<li>\u5176\u4e2d\u8f93\u5165\u7b2c<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>j<\/mi><annotation encoding=\"application\/x-tex\">j<\/annotation><\/semantics><\/math> \u4e2a\u8282\u70b9\u5230\u8f93\u51fa<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msup><mi>o<\/mi><mn>1<\/mn><\/msup><annotation encoding=\"application\/x-tex\">o^1<\/annotation><\/semantics><\/math> \u7684\u6743\u503c\u8fde\u63a5\u8bb0\u4e3a <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msubsup><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><mn>1<\/mn><\/msubsup><annotation encoding=\"application\/x-tex\">w^1_{j1}<\/annotation><\/semantics><\/math><\/li>\n<\/ul><\/li>\n<li>\u4e0a\u6807\u8868\u793a\u6743\u503c\u5c5e\u4e8e\u7684\u5c42\u6570\uff0c\u4e0b\u6807\u8868\u793a\u5f53\u524d\u8fde\u63a5\u7684\u8d77\u59cb\u8282\u70b9\u53f7\u548c\u7ec8\u6b62\u8282\u70b9\u53f7\n<ul>\n<li>\u5982\u4e0b\u6807<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><annotation encoding=\"application\/x-tex\">j1<\/annotation><\/semantics><\/math> \u8868\u793a\u4e0a\u4e00\u5c42\u7684\u7b2c<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>j<\/mi><annotation encoding=\"application\/x-tex\">j<\/annotation><\/semantics><\/math> \u53f7\u8282\u70b9\u5230\u5f53\u524d\u5c42\u7684 1 \u53f7\u8282\u70b9<\/li>\n<\/ul><\/li>\n<li>\u672a\u7ecf\u8fc7\u6fc0\u6d3b\u51fd\u6570\u7684\u8f93\u51fa\u53d8\u91cf\u4e3a<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msubsup><mi>z<\/mi><mn>1<\/mn><mn>1<\/mn><\/msubsup><annotation encoding=\"application\/x-tex\">z_1^1<\/annotation><\/semantics><\/math>\uff0c\u7ecf\u8fc7\u6fc0\u6d3b\u51fd\u6570\u4e4b\u540e\u7684\u8f93\u51fa\u4e3a<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msubsup><mi>o<\/mi><mn>1<\/mn><mn>1<\/mn><\/msubsup><annotation encoding=\"application\/x-tex\">o_1^1<\/annotation><\/semantics><\/math><\/li>\n<li>\u7531\u4e8e\u53ea\u6709\u4e00\u4e2a\u8f93\u51fa\u8282\u70b9\uff0c\u6545<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>o<\/mi><mn>1<\/mn><mn>1<\/mn><\/msubsup><mo>=<\/mo><msup><mi>o<\/mi><mn>1<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">o_1^1 =o^1<\/annotation><\/semantics><\/math><\/li>\n<\/ul>\n<p><img decoding=\"async\" src=\"https:\/\/thereisno.top\/wp-content\/uploads\/2024\/10\/sjwl1.png\" alt=\"sjwl1.png\"><\/p>\n<p>\u4e0b\u9762\u6211\u4eec\u6765\u8ba1\u7b97\u5747\u65b9\u5dee\u7b97\u662f\u51fd\u6570\u7684\u68af\u5ea6<\/p>\n<p>\u7531\u4e8e\u5355\u4e2a\u795e\u7ecf\u5143\u53ea\u6709\u4e00\u4e2a\u8f93\u51fa\uff0c\u90a3\u4e48\u635f\u5931\u51fd\u6570\u53ef\u4ee5\u8868\u793a\u4e3a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>L<\/mi><mo>=<\/mo><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msubsup><mi>o<\/mi><mn>1<\/mn><mn>1<\/mn><\/msubsup><mo>\u2212<\/mo><mi>t<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">\nL =\\frac{1}{2}(o_1^1 &#8211; t)^2\n<\/annotation><\/semantics><\/math> \u6dfb\u52a0 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{1}{2}<\/annotation><\/semantics><\/math> \u662f\u4e3a\u4e86\u8ba1\u7b97\u65b9\u4fbf\uff0c\u6211\u4eec\u4ee5\u6743\u503c\u8fde\u63a5\u7684\u7b2c <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>j<\/mi><mo>\u2208<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">[<\/mo><mn>1<\/mn><mo>,<\/mo><mi>J<\/mi><mo stretchy=\"true\" form=\"postfix\">]<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">j\\in[1,J]<\/annotation><\/semantics><\/math> \u53f7\u8282\u70b9\u7684\u6743\u503c <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><annotation encoding=\"application\/x-tex\">w_{j1}<\/annotation><\/semantics><\/math> \u4e3a\u4f8b\uff0c\u8003\u8651\u635f\u5931\u51fd\u6570 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>L<\/mi><annotation encoding=\"application\/x-tex\">L<\/annotation><\/semantics><\/math> \u5173\u4e8e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><annotation encoding=\"application\/x-tex\">w_{j1}<\/annotation><\/semantics><\/math> \u7684\u504f\u5bfc\u6570\uff0c\u5373 \u5bf9\u5176\u7684\u504f\u5bfc\u6570 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial L}{\\partial w_{j1}}<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo>\u2212<\/mo><mi>t<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mn>1<\/mn><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{j1}} =(o_1 &#8211; t)\\frac{\\partial o_1}{\\partial w_{j1}}\n<\/annotation><\/semantics><\/math> \u7531\u4e8e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mn>1<\/mn><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">o_1 =\\sigma(z_1)<\/annotation><\/semantics><\/math> \uff0c\u7531\u4e0a\u9762\u7684\u63a8\u5bfc\u53ef\u77e5 Sigmoid \u51fd\u6570\u7684\u5bfc\u6570 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mi>\u2032<\/mi><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><mi>\u03c3<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\sigma&#39; =\\sigma(1-\\sigma)<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo>\u2212<\/mo><mi>t<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mn>1<\/mn><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{j1}} =(o_1 &#8211; t)\\frac{\\partial \\sigma(z_1)}{\\partial w_{j1}}\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo>\u2212<\/mo><mi>t<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mn>1<\/mn><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mn>1<\/mn><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mn>1<\/mn><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n=(o_1-t)\\sigma(z_1)(1-\\sigma(z_1))\\frac{\\partial z_1}{\\partial w_{j1}}\n<\/annotation><\/semantics><\/math> \u628a <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mn>1<\/mn><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\sigma(z_1)<\/annotation><\/semantics><\/math> \u5199\u6210 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mn>1<\/mn><\/msub><annotation encoding=\"application\/x-tex\">o_1<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo>\u2212<\/mo><mi>t<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mn>1<\/mn><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n=(o_1-t)o_1(1-o_1)\\frac{\\partial z_1}{\\partial w_{j1}}\n<\/annotation><\/semantics><\/math> \u7531\u4e8e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mn>1<\/mn><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msub><mi>x<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\\frac{\\partial z_1}{\\partial w_{j1}} =x_j<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo>\u2212<\/mo><mi>t<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mn>1<\/mn><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>x<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{j1}} =(o_1-t)o_1(1-o_1)x_j\n<\/annotation><\/semantics><\/math> \u4ece\u4e0a\u5f0f\u53ef\u4ee5\u770b\u5230,\u8bef\u5dee\u5bf9\u6743\u503c <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>w<\/mi><mrow><mi>j<\/mi><mn>1<\/mn><\/mrow><\/msub><annotation encoding=\"application\/x-tex\">w_{j1}<\/annotation><\/semantics><\/math> \u7684\u504f\u5bfc\u6570\u53ea\u4e0e\u8f93\u51fa\u503c <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mn>1<\/mn><\/msub><annotation encoding=\"application\/x-tex\">o_1<\/annotation><\/semantics><\/math> \u3001\u771f\u5b9e\u503c <code>t<\/code> \u4ee5\u53ca\u5f53\u524d\u6743\u503c\u8fde\u63a5\u7684\u8f93\u5165 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>x<\/mi><mi>j<\/mi><\/msub><annotation encoding=\"application\/x-tex\">x_j<\/annotation><\/semantics><\/math> \u6709\u5173<\/p>\n<h3 id=\"\u5168\u94fe\u63a5\u5c42\u68af\u5ea6\">4\u3001\u5168\u94fe\u63a5\u5c42\u68af\u5ea6<\/h3>\n<p>\u6211\u4eec\u628a\u5355\u4e2a\u795e\u7ecf\u5143\u6a21\u578b\u63a8\u5e7f\u5230\u5355\u5c42\u5168\u8fde\u63a5\u5c42\u7684\u7f51\u7edc\u4e0a,\u5982\u4e0b\u56fe\u6240\u793a\u3002\u8f93\u5165\u5c42\u901a\u8fc7\u4e00\u4e2a\u5168\u8fde\u63a5\u5c42\u5f97\u5230\u8f93\u51fa\u5411\u91cf <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msup><mi>o<\/mi><mn>1<\/mn><\/msup><annotation encoding=\"application\/x-tex\">o^1<\/annotation><\/semantics><\/math> ,\u4e0e\u771f\u5b9e\u6807\u7b7e\u5411\u91cf <code>t<\/code> \u8ba1\u7b97\u5747\u65b9\u5dee\u3002\u8f93\u5165\u8282\u70b9\u6570\u4e3a <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>J<\/mi><annotation encoding=\"application\/x-tex\">J<\/annotation><\/semantics><\/math> ,\u8f93\u51fa\u8282\u70b9\u6570\u4e3a <code>K<\/code> \u3002<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/thereisno.top\/wp-content\/uploads\/2024\/10\/sjwl2.png\" alt=\"sjwl2.png\"><\/p>\n<p>\u4e0e\u5355\u4e2a\u795e\u7ecf\u5143\u4e0d\u540c\uff0c\u5168\u94fe\u63a5\u5c42\u6709\u591a\u4e2a\u8f93\u51fa\u8282\u70b9 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>o<\/mi><mn>1<\/mn><mn>1<\/mn><\/msubsup><mo>,<\/mo><msubsup><mi>o<\/mi><mn>2<\/mn><mn>1<\/mn><\/msubsup><mo>,<\/mo><msubsup><mi>o<\/mi><mn>3<\/mn><mn>1<\/mn><\/msubsup><mo>,<\/mo><mi>.<\/mi><mi>.<\/mi><mi>.<\/mi><mo>,<\/mo><msubsup><mi>o<\/mi><mi>K<\/mi><mn>1<\/mn><\/msubsup><\/mrow><annotation encoding=\"application\/x-tex\">o_1^1, o_2^1, o_3^1,&#8230;,o_K^1<\/annotation><\/semantics><\/math> \uff0c\u6bcf\u4e2a\u8f93\u51fa\u8282\u70b9\u5bf9\u5e94\u4e0d\u540c\u771f\u5b9e\u6807\u7b7e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>t<\/mi><mn>1<\/mn><\/msub><mo>,<\/mo><msub><mi>t<\/mi><mn>2<\/mn><\/msub><mo>,<\/mo><msub><mi>t<\/mi><mn>3<\/mn><\/msub><mo>,<\/mo><mi>.<\/mi><mi>.<\/mi><mi>.<\/mi><mo>,<\/mo><msub><mi>t<\/mi><mi>K<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">t_1, t_2, t_3,&#8230;, t_K<\/annotation><\/semantics><\/math> \uff0c\u5747\u65b9\u8bef\u5dee\u53ef\u4ee5\u8868\u793a\u4e3a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>L<\/mi><mo>=<\/mo><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><munderover><mo>\u2211<\/mo><mrow><mi>i<\/mi><mo>=<\/mo><mn>1<\/mn><\/mrow><mi>K<\/mi><\/munderover><msup><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msubsup><mi>o<\/mi><mi>i<\/mi><mn>1<\/mn><\/msubsup><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>i<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/msup><\/mrow><annotation encoding=\"application\/x-tex\">\nL =\\frac{1}{2}\\sum_{i=1}^K(o_i^1-t_i)^2\n<\/annotation><\/semantics><\/math> \u7531\u4e8e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial L}{\\partial w_{jk}}<\/annotation><\/semantics><\/math> \u53ea\u4e0e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msubsup><mi>o<\/mi><mi>k<\/mi><mn>1<\/mn><\/msubsup><annotation encoding=\"application\/x-tex\">o_k^1<\/annotation><\/semantics><\/math> \u6709\u5173\u8054\uff0c\u4e0a\u5f0f\u4e2d\u7684\u6c42\u548c\u7b26\u53f7\u53ef\u4ee5\u53bb\u6389\uff0c\u5373<math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>i<\/mi><mo>=<\/mo><mi>k<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">i =k<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}} =(o_k-t_k)\\frac{\\partial o_k}{\\partial w_{jk}}\n<\/annotation><\/semantics><\/math> \u5c06 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">o_k=\\sigma(z_k)<\/annotation><\/semantics><\/math> \u5e26\u5165<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}} =(o_k-t_k)\\frac{\\partial \\sigma(z_k)}{\\partial w_{jk}}\n<\/annotation><\/semantics><\/math> \u8003\u8651 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>S<\/mi><mi>i<\/mi><mi>g<\/mi><mi>m<\/mi><mi>o<\/mi><mi>i<\/mi><mi>d<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">Sigmoid<\/annotation><\/semantics><\/math> \u51fd\u6570\u7684\u5bfc\u6570 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mi>\u2032<\/mi><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><mi>\u03c3<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\sigma&#39; =\\sigma(1-\\sigma)<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msubsup><mi>z<\/mi><mi>k<\/mi><mn>1<\/mn><\/msubsup><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}} =(o_k-t_k)\\sigma(z_k)(1-\\sigma(z_k))\\frac{\\partial z_k^1}{\\partial w_{jk}}\n<\/annotation><\/semantics><\/math> \u5c06 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\sigma(z_k)<\/annotation><\/semantics><\/math> \u8bb0\u4e3a<\/p>\n<p><math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">o_k<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msubsup><mi>z<\/mi><mi>k<\/mi><mn>1<\/mn><\/msubsup><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}} =(o_k-t_k)o_k(1-o_k)\\frac{\\partial z_k^1}{\\partial w_{jk}}\n<\/annotation><\/semantics><\/math> \u6700\u7ec8\u53ef\u5f97 <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><msub><mi>x<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}} =(o_k-t_k)o_k(1-o_k)\\cdot x_j\n<\/annotation><\/semantics><\/math> \u7531\u6b64\u53ef\u4ee5\u770b\u5230,\u67d0\u6761\u8fde\u63a5 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><annotation encoding=\"application\/x-tex\">w_{jk}<\/annotation><\/semantics><\/math> \u4e0a\u9762\u7684\u8fde\u63a5,\u53ea\u4e0e\u5f53\u524d\u8fde\u63a5\u7684\u8f93\u51fa\u8282\u70b9 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msubsup><mi>o<\/mi><mi>k<\/mi><mn>1<\/mn><\/msubsup><annotation encoding=\"application\/x-tex\">o_k^1<\/annotation><\/semantics><\/math> ,\u5bf9\u5e94\u7684\u771f\u5b9e\u503c\u8282\u70b9\u7684\u6807\u7b7e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msubsup><mi>t<\/mi><mi>k<\/mi><mn>1<\/mn><\/msubsup><annotation encoding=\"application\/x-tex\">t_k^1<\/annotation><\/semantics><\/math> ,\u4ee5\u53ca\u5bf9\u5e94\u7684\u8f93\u5165\u8282\u70b9 <code>x<\/code> \u6709\u5173\u3002<\/p>\n<p>\u6211\u4eec\u4ee4 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>\u03b4<\/mi><mi>k<\/mi><\/msub><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\delta_k =(o_k-t_k)o_k(1-o_k)<\/annotation><\/semantics><\/math> \uff0c\u5219 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial L}{\\partial w_{jk}}<\/annotation><\/semantics><\/math> \u53ef\u4ee5\u8868\u8fbe\u4e3a <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msub><mi>\u03b4<\/mi><mi>k<\/mi><\/msub><mo>\u22c5<\/mo><msub><mi>x<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}}=\\delta_k\\cdot x_j\n<\/annotation><\/semantics><\/math> \u5176\u4e2d <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>\u03b4<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">\\delta _k<\/annotation><\/semantics><\/math> \u53d8\u91cf\u8868\u5f81\u8fde\u63a5\u7ebf\u7684\u7ec8\u6b62\u8282\u70b9\u7684\u68af\u5ea6\u4f20\u64ad\u7684\u67d0\u79cd\u7279\u6027\uff0c\u4f7f\u7528 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>\u03b4<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">\\delta_k<\/annotation><\/semantics><\/math> \u8868\u793a\u540e\uff0c <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial L}{\\partial w_{jk}}<\/annotation><\/semantics><\/math> \u504f\u5bfc\u6570\u53ea\u4e0e\u5f53\u524d\u8fde\u63a5\u7684\u8d77\u59cb\u8282\u70b9 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>x<\/mi><mi>j<\/mi><\/msub><annotation encoding=\"application\/x-tex\">x_j<\/annotation><\/semantics><\/math>\uff0c\u7ec8\u6b62\u8282\u70b9\u5904 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>\u03b4<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">\\delta_k<\/annotation><\/semantics><\/math> \u6709\u5173\uff0c\u7406\u89e3\u8d77\u6765\u6bd4\u8f83\u76f4\u89c2\u3002<\/p>\n<h3 id=\"\u53cd\u5411\u4f20\u64ad\u7b97\u6cd5\">5\u3001\u53cd\u5411\u4f20\u64ad\u7b97\u6cd5<\/h3>\n<p>\u770b\u5230\u8fd9\u91cc\u5927\u5bb6\u4e5f\u4e0d\u5bb9\u6613\uff0c\u6bd5\u7adf\u8fd9\u4e48\u591a\u516c\u5f0f\u54c8\u54c8\u54c8\uff0c\u4e0d\u8fc7\u6fc0\u52a8\u7684\u65f6\u523b\u5230\u4e86<\/p>\n<p>\u5148\u56de\u987e\u4e0b\u8f93\u51fa\u5c42\u7684\u504f\u5bfc\u6570\u516c\u5f0f<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><msub><mi>x<\/mi><mi>j<\/mi><\/msub><mo>=<\/mo><msub><mi>\u03b4<\/mi><mi>k<\/mi><\/msub><mo>\u22c5<\/mo><msub><mi>x<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}} =(o_k-t_k)o_k(1-o_k)\\cdot x_j =\\delta_k \\cdot x_j\n<\/annotation><\/semantics><\/math> \u591a\u5c42\u5168\u8fde\u63a5\u5c42\u5982\u4e0b\u56fe\u6240\u793a<\/p>\n<ul>\n<li>\u8f93\u51fa\u8282\u70b9\u6570\u4e3a <code>K<\/code> \uff0c\u8f93\u51fa <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msup><mi>o<\/mi><mi>k<\/mi><\/msup><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">[<\/mo><msubsup><mi>o<\/mi><mn>1<\/mn><mi>k<\/mi><\/msubsup><mo>,<\/mo><msubsup><mi>o<\/mi><mn>2<\/mn><mi>k<\/mi><\/msubsup><mo>,<\/mo><msubsup><mi>o<\/mi><mn>3<\/mn><mi>k<\/mi><\/msubsup><mo>,<\/mo><mi>.<\/mi><mi>.<\/mi><mi>.<\/mi><mo>,<\/mo><msubsup><mi>o<\/mi><mi>k<\/mi><mi>k<\/mi><\/msubsup><mo stretchy=\"true\" form=\"postfix\">]<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">o^k =[o_1^k, o_2^k, o_3^k,&#8230;, o_k^k]<\/annotation><\/semantics><\/math><\/li>\n<li>\u5012\u6570\u7684\u4e8c\u5c42\u7684\u8282\u70b9\u6570\u4e3a <code>J<\/code> \uff0c\u8f93\u51fa\u4e3a <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msup><mi>o<\/mi><mi>J<\/mi><\/msup><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">[<\/mo><msubsup><mi>o<\/mi><mn>1<\/mn><mi>J<\/mi><\/msubsup><mo>,<\/mo><msubsup><mi>o<\/mi><mn>2<\/mn><mi>J<\/mi><\/msubsup><mo>,<\/mo><mi>.<\/mi><mi>.<\/mi><mi>.<\/mi><mo>,<\/mo><msubsup><mi>o<\/mi><mi>J<\/mi><mi>J<\/mi><\/msubsup><mo stretchy=\"true\" form=\"postfix\">]<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">o^J=[o_1^J, o_2^J,&#8230;, o_J^J]<\/annotation><\/semantics><\/math><\/li>\n<li>\u5012\u6570\u7b2c\u4e09\u5c42\u7684\u8282\u70b9\u6570\u4e3a <code>I<\/code> \uff0c\u8f93\u51fa\u4e3a <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msup><mi>o<\/mi><mi>I<\/mi><\/msup><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">[<\/mo><msubsup><mi>o<\/mi><mn>1<\/mn><mi>I<\/mi><\/msubsup><mo>,<\/mo><msubsup><mi>o<\/mi><mn>2<\/mn><mi>I<\/mi><\/msubsup><mo>,<\/mo><mi>.<\/mi><mi>.<\/mi><mi>.<\/mi><mo>,<\/mo><msubsup><mi>o<\/mi><mi>I<\/mi><mi>I<\/mi><\/msubsup><mo stretchy=\"true\" form=\"postfix\">]<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">o^I =[o_1^I, o_2^I,&#8230;, o_I^I]<\/annotation><\/semantics><\/math><\/li>\n<\/ul>\n<p><img decoding=\"async\" src=\"https:\/\/thereisno.top\/wp-content\/uploads\/2024\/10\/sjwl3.png\" alt=\"sjwl3.png\"><\/p>\n<p>\u5747\u65b9\u8bef\u5dee\u51fd\u6570<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mfrac><mi>\u2202<\/mi><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mfrac><mn>1<\/mn><mn>2<\/mn><\/mfrac><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mn>2<\/mn><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=\\frac{\\partial}{\\partial w_{ij}}\\frac{1}{2}\\sum_{k}(o_k-t_k)2\n<\/annotation><\/semantics><\/math> \u7531\u4e8e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>L<\/mi><annotation encoding=\"application\/x-tex\">L<\/annotation><\/semantics><\/math> \u901a\u8fc7\u6bcf\u4e2a\u8f93\u51fa\u8282\u70b9 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">o_k<\/annotation><\/semantics><\/math> \u4e0e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>w<\/mi><mi>i<\/mi><\/msub><annotation encoding=\"application\/x-tex\">w_i<\/annotation><\/semantics><\/math> \u76f8\u5173\u8054,\u6545\u6b64\u5904\u4e0d\u80fd\u53bb\u6389\u6c42\u548c\u7b26\u53f7<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=\\sum_k(o_k-t_k)\\frac{\\partial o_k}{\\partial w_{ij}}\n<\/annotation><\/semantics><\/math> \u5c06 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">o_k=\\sigma(z_k)<\/annotation><\/semantics><\/math> \u5e26\u5165<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=\\sum_k(o_k-t_k)\\frac{\\partial \\sigma(z_k)}{\\partial w_{ij}}\n<\/annotation><\/semantics><\/math> <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>S<\/mi><mi>i<\/mi><mi>g<\/mi><mi>m<\/mi><mi>o<\/mi><mi>i<\/mi><mi>d<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">Sigmoid<\/annotation><\/semantics><\/math> \u51fd\u6570\u7684\u5bfc\u6570 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mi>\u2032<\/mi><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><mi>\u03c3<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\sigma&#39; =\\sigma(1-\\sigma)<\/annotation><\/semantics><\/math> \uff0c\u7ee7\u7eed\u6c42\u5bfc\uff0c\u5e76\u5c06 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\sigma(z_k)<\/annotation><\/semantics><\/math> \u5199\u56de <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mi>k<\/mi><\/msub><annotation encoding=\"application\/x-tex\">o_k<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=\\sum_k(o_k-t_k)o_k(1-o_k)\\frac{\\partial z_k}{\\partial w_{ij}}\n<\/annotation><\/semantics><\/math> \u5bf9\u4e8e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial z_k}{\\partial w_{ij}}<\/annotation><\/semantics><\/math> \u53ef\u4ee5\u5e94\u7528\u94fe\u5f0f\u6cd5\u5219\u5206\u89e3\u4e3a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mi>k<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mi>k<\/mi><\/msub><\/mrow><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mfrac><mo>\u22c5<\/mo><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial z_k}{\\partial w_{ij}} =\\frac{\\partial z_k}{o_j}\\cdot \\frac{\\partial o_j}{\\partial w_{ij}}\n<\/annotation><\/semantics><\/math> \u7531\u56fe\u53ef\u77e5 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo>=<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo>\u22c5<\/mo><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><mo>+<\/mo><msub><mi>b<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><annotation encoding=\"application\/x-tex\">\\left(z_k =o_j \\cdot w_{jk} + b_k\\right)<\/annotation><\/semantics><\/math> ,\u6545\u6709<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mi>k<\/mi><\/msub><\/mrow><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mfrac><mo>=<\/mo><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial z_k}{o_j} =w_{jk}\n<\/annotation><\/semantics><\/math> \u6240\u4ee5<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><mo>\u22c5<\/mo><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=\\sum_k(o_k-t_k)o_k(1-o_k)w_{jk}\\cdot\\frac{\\partial o_j}{\\partial w_{ij}}\n<\/annotation><\/semantics><\/math> \u8003\u8651\u5230 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial o_j}{\\partial w_{ij}}<\/annotation><\/semantics><\/math> \u4e0e <code>k<\/code> \u65e0\u5173\uff0c\u53ef\u5c06\u5176\u63d0\u53d6\u51fa\u6765<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><mfrac><mrow><mi>\u2202<\/mi><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>\u22c5<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=\\frac{\\partial o_j}{\\partial w_{ij}}\\cdot\\sum_k(o_k-t_k)o_k(1-o_k)w_{jk}\n<\/annotation><\/semantics><\/math> \u518d\u4e00\u6b21\u6709 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">o_k=\\sigma(z_k)<\/annotation><\/semantics><\/math> \uff0c\u5e76\u5229\u7528 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>S<\/mi><mi>i<\/mi><mi>g<\/mi><mi>m<\/mi><mi>o<\/mi><mi>i<\/mi><mi>d<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">Sigmoid<\/annotation><\/semantics><\/math> \u51fd\u6570\u7684\u5bfc\u6570 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mi>\u03c3<\/mi><mi>\u2032<\/mi><mo>=<\/mo><mi>\u03c3<\/mi><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><mi>\u03c3<\/mi><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\sigma&#39; =\\sigma(1-\\sigma)<\/annotation><\/semantics><\/math>\u6709<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mi>j<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>\u22c5<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=o_j(1-o_j)\\frac{\\partial z_j}{\\partial w_{ij}} \\cdot\\sum_k(o_k-t_k)o_k(1-o_k)w_{jk}\n<\/annotation><\/semantics><\/math> \u7531\u4e8e <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><msub><mi>z<\/mi><mi>j<\/mi><\/msub><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msub><mi>o<\/mi><mi>i<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>z<\/mi><mi>j<\/mi><\/msub><mo>=<\/mo><msub><mi>o<\/mi><mi>i<\/mi><\/msub><mo>\u22c5<\/mo><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><mo>+<\/mo><msub><mi>b<\/mi><mi>j<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\frac{\\partial z_j}{\\partial w_{ij}} =o_i \\left(z_j =o_i\\cdot w_{ij} + b_j\\right)<\/annotation><\/semantics><\/math><\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>i<\/mi><\/msub><mo>\u22c5<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=o_j(1-o_j)o_i \\cdot\\sum_k(o_k-t_k)o_k(1-o_k)w_{jk}\n<\/annotation><\/semantics><\/math> \u5176\u4e2d <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>\u03b4<\/mi><mi>k<\/mi><mi>K<\/mi><\/msubsup><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\\delta _k^K =(o_k-t_k)o_k(1-o_k)<\/annotation><\/semantics><\/math> \uff0c\u5219<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>i<\/mi><\/msub><mo>\u22c5<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><msubsup><mi>\u03b4<\/mi><mi>k<\/mi><mi>K<\/mi><\/msubsup><mo>\u22c5<\/mo><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}}=o_j(1-o_j)o_i \\cdot\\sum_k\\delta _k^K\\cdot w_{jk}\n<\/annotation><\/semantics><\/math> \u4eff\u7167\u8f93\u51fa\u5c42\u7684\u4e66\u5199\u65b9\u5f0f\uff0c\u5b9a\u4e49<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>\u03b4<\/mi><mi>j<\/mi><mi>J<\/mi><\/msubsup><mo>=<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><msubsup><mi>\u03b4<\/mi><mi>k<\/mi><mi>K<\/mi><\/msubsup><mo>\u22c5<\/mo><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\delta_j^J =o_j(1-o_j) \\cdot \\sum_k \\delta _k^K\\cdot w_{jk}\n<\/annotation><\/semantics><\/math> \u6b64\u65f6 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><annotation encoding=\"application\/x-tex\">\\frac{\\partial L}{\\partial w_{ij}}<\/annotation><\/semantics><\/math> \u53ef\u4ee5\u5199\u4e3a\u5f53\u524d\u8fde\u63a5\u7684\u8d77\u59cb\u8282\u70b9\u7684\u8f93\u51fa\u503c <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mi>i<\/mi><\/msub><annotation encoding=\"application\/x-tex\">o_i<\/annotation><\/semantics><\/math> \u4e0e\u7ec8\u6b62\u8282\u70b9 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>j<\/mi><annotation encoding=\"application\/x-tex\">j<\/annotation><\/semantics><\/math> \u7684\u68af\u5ea6\u4fe1\u606f <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msubsup><mi>\u03b4<\/mi><mi>j<\/mi><mi>J<\/mi><\/msubsup><annotation encoding=\"application\/x-tex\">\\delta _j^J<\/annotation><\/semantics><\/math> \u7684\u7b80\u5355\u76f8\u4e58\u8fd0\u7b97:<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msubsup><mi>\u03b4<\/mi><mi>j<\/mi><mi>J<\/mi><\/msubsup><mo>\u22c5<\/mo><msubsup><mi>o<\/mi><mi>i<\/mi><mi>I<\/mi><\/msubsup><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}} =\\delta_j^J\\cdot o_i^I\n<\/annotation><\/semantics><\/math> \u901a\u8fc7\u5b9a\u4e49 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>\u03b4<\/mi><annotation encoding=\"application\/x-tex\">\\delta<\/annotation><\/semantics><\/math> \u53d8\u91cf,\u6bcf\u4e00\u5c42\u7684\u68af\u5ea6\u8868\u8fbe\u5f0f\u53d8\u5f97\u66f4\u52a0\u6e05\u6670\u7b80\u6d01,\u5176\u4e2d <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>\u03b4<\/mi><annotation encoding=\"application\/x-tex\">\\delta<\/annotation><\/semantics><\/math> \u53ef\u4ee5\u7b80\u5355\u7406\u89e3\u4e3a\u5f53\u524d\u8fde\u63a5 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><annotation encoding=\"application\/x-tex\">w_{ij}<\/annotation><\/semantics><\/math> \u5bf9\u8bef\u5dee\u51fd\u6570\u7684\u8d21\u732e\u503c\u3002<\/p>\n<h3 id=\"\u603b\u7ed3\">6\u3001\u603b\u7ed3<\/h3>\n<p>\u8f93\u51fa\u5c42<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msubsup><mi>\u03b4<\/mi><mi>k<\/mi><mi>K<\/mi><\/msubsup><mo>\u22c5<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{jk}} =\\delta _k^K\\cdot o_j\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>\u03b4<\/mi><mi>k<\/mi><mi>K<\/mi><\/msubsup><mo>=<\/mo><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo>\u2212<\/mo><msub><mi>t<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>k<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><\/mrow><annotation encoding=\"application\/x-tex\">\n\\delta _k^K =(o_k-t_k)o_k(1-o_k)\n<\/annotation><\/semantics><\/math> \u5012\u6570\u7b2c\u4e8c\u5c42\uff1a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msubsup><mi>\u03b4<\/mi><mi>j<\/mi><mi>J<\/mi><\/msubsup><mo>\u22c5<\/mo><msub><mi>o<\/mi><mi>i<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ij}} =\\delta _j^J\\cdot o_i\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>\u03b4<\/mi><mi>j<\/mi><mi>J<\/mi><\/msubsup><mo>=<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>j<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><munder><mo>\u2211<\/mo><mi>k<\/mi><\/munder><msubsup><mi>\u03b4<\/mi><mi>k<\/mi><mi>K<\/mi><\/msubsup><mo>\u22c5<\/mo><msub><mi>w<\/mi><mrow><mi>j<\/mi><mi>k<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\delta_j^J =o_j(1-o_j) \\cdot \\sum_k \\delta _k^K\\cdot w_{jk}\n<\/annotation><\/semantics><\/math> \u5012\u6570\u7b2c\u4e09\u5c42\uff1a<\/p>\n<p><math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><mfrac><mrow><mi>\u2202<\/mi><mi>L<\/mi><\/mrow><mrow><mi>\u2202<\/mi><msub><mi>w<\/mi><mrow><mi>n<\/mi><mi>i<\/mi><\/mrow><\/msub><\/mrow><\/mfrac><mo>=<\/mo><msubsup><mi>\u03b4<\/mi><mi>i<\/mi><mi>I<\/mi><\/msubsup><mo>\u22c5<\/mo><msub><mi>o<\/mi><mi>n<\/mi><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\frac{\\partial L}{\\partial w_{ni}} =\\delta _i^I\\cdot o_n\n<\/annotation><\/semantics><\/math> <math display=\"block\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>\u03b4<\/mi><mi>i<\/mi><mi>I<\/mi><\/msubsup><mo>=<\/mo><msub><mi>o<\/mi><mi>i<\/mi><\/msub><mrow><mo stretchy=\"true\" form=\"prefix\">(<\/mo><mn>1<\/mn><mo>\u2212<\/mo><msub><mi>o<\/mi><mi>i<\/mi><\/msub><mo stretchy=\"true\" form=\"postfix\">)<\/mo><\/mrow><mo>\u22c5<\/mo><munder><mo>\u2211<\/mo><mi>j<\/mi><\/munder><msubsup><mi>\u03b4<\/mi><mi>j<\/mi><mi>J<\/mi><\/msubsup><mo>\u22c5<\/mo><msub><mi>w<\/mi><mrow><mi>i<\/mi><mi>j<\/mi><\/mrow><\/msub><\/mrow><annotation encoding=\"application\/x-tex\">\n\\delta _i^I =o_i(1-o_i)\\cdot \\sum_j\\delta_j^J\\cdot w_{ij}\n<\/annotation><\/semantics><\/math> \u5176\u4e2d <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><msub><mi>o<\/mi><mi>n<\/mi><\/msub><annotation encoding=\"application\/x-tex\">o_n<\/annotation><\/semantics><\/math> \u4e3a\u5012\u6570\u7b2c\u4e09\u5c42\u7684\u8f93\u5165\uff0c\u5373\u5012\u6570\u7b2c\u56db\u5c42\u7684\u8f93\u51fa<\/p>\n<p>\u4f9d\u7167\u6b64\u89c4\u5f8b,\u53ea\u9700\u8981\u5faa\u73af\u8fed\u4ee3\u8ba1\u7b97\u6bcf\u4e00\u5c42\u6bcf\u4e2a\u8282\u70b9\u7684 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mrow><msubsup><mi>\u03b4<\/mi><mi>k<\/mi><mi>K<\/mi><\/msubsup><mo>,<\/mo><msubsup><mi>\u03b4<\/mi><mi>j<\/mi><mi>J<\/mi><\/msubsup><mo>,<\/mo><msubsup><mi>\u03b4<\/mi><mi>i<\/mi><mi>I<\/mi><\/msubsup><mo>,<\/mo><mi>.<\/mi><mi>.<\/mi><mi>.<\/mi><\/mrow><annotation encoding=\"application\/x-tex\">\\delta _k^K, \\delta_j^J, \\delta_i^I,&#8230;<\/annotation><\/semantics><\/math> \u7b49\u503c\u5373\u53ef\u6c42\u5f97\u5f53\u524d\u5c42\u7684\u504f\u5bfc\u6570,\u4ece\u800c\u5f97\u5230\u6bcf\u5c42\u6743\u503c\u77e9\u9635 <math display=\"inline\" xmlns=\"http:\/\/www.w3.org\/1998\/Math\/MathML\"><semantics><mi>W<\/mi><annotation encoding=\"application\/x-tex\">W<\/annotation><\/semantics><\/math> \u7684\u68af\u5ea6,\u518d\u901a\u8fc7\u68af\u5ea6\u4e0b\u964d\u7b97\u6cd5\u8fed\u4ee3\u4f18\u5316\u7f51\u7edc\u53c2\u6570\u5373\u53ef\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u53cd\u5411\u4f20\u64ad\uff08\u82f1\u8bed\uff1aBackpropagation\uff0c\u7f29\u5199\u4e3aBP\uff09\u662f\u201c\u8bef\u5dee\u53cd\u5411\u4f20\u64ad\u201d\u7684\u7b80\u79f0\uff0c\u662f\u4e00\u79cd\u4e0e\u6700\u4f18\u5316\u65b9\u6cd5\uff08\u5982\u68af &hellip; <\/p>\n<p class=\"link-more\"><a href=\"https:\/\/thereisno.top\/?p=1435\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u201c\u795e\u7ecf\u7f51\u7edc\u4e4b\u53cd\u5411\u4f20\u64ad\u7b97\u6cd5\uff08BP\uff09\u201d<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[246,103,91],"tags":[166],"class_list":["post-1435","post","type-post","status-publish","format-standard","hentry","category-ai","category-103","category-91","tag-166"],"_links":{"self":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts\/1435","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1435"}],"version-history":[{"count":11,"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts\/1435\/revisions"}],"predecessor-version":[{"id":1503,"href":"https:\/\/thereisno.top\/index.php?rest_route=\/wp\/v2\/posts\/1435\/revisions\/1503"}],"wp:attachment":[{"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1435"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1435"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/thereisno.top\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1435"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}