{"id":3331,"date":"2026-04-11T18:55:00","date_gmt":"2026-04-11T10:55:00","guid":{"rendered":"https:\/\/100bjy.com\/index.php\/2026\/04\/11\/ai%e6%a8%a1%e5%9e%8b%e8%a8%93%e7%b7%b4%e7%ad%96%e7%95%a5%e6%96%b0%e9%80%b2%e5%b1%95-rlsd%e5%85%bc%e9%a1%a7%e7%a9%a9%e5%ae%9a%e8%88%87%e6%95%88%e7%8e%87\/"},"modified":"2026-04-11T18:55:00","modified_gmt":"2026-04-11T10:55:00","slug":"ai%e6%a8%a1%e5%9e%8b%e8%a8%93%e7%b7%b4%e7%ad%96%e7%95%a5%e6%96%b0%e9%80%b2%e5%b1%95-rlsd%e5%85%bc%e9%a1%a7%e7%a9%a9%e5%ae%9a%e8%88%87%e6%95%88%e7%8e%87","status":"publish","type":"post","link":"https:\/\/100bjy.com\/index.php\/2026\/04\/11\/ai%e6%a8%a1%e5%9e%8b%e8%a8%93%e7%b7%b4%e7%ad%96%e7%95%a5%e6%96%b0%e9%80%b2%e5%b1%95-rlsd%e5%85%bc%e9%a1%a7%e7%a9%a9%e5%ae%9a%e8%88%87%e6%95%88%e7%8e%87\/","title":{"rendered":"AI\u6a21\u578b\u8a13\u7df4\u7b56\u7565\u65b0\u9032\u5c55 RLSD\u517c\u9867\u7a69\u5b9a\u8207\u6548\u7387"},"content":{"rendered":"<div class=\"content-inner \">\n<figure><figcaption>\u5716\uff0f\u672c\u5831AI\u88fd\u5716\uff08\u793a\u610f\u5716\uff09<\/figcaption><\/figure>\n<p>\u5546\u50b3\u5a92\uff5c\u8449\u5b89\u5ead\uff0f\u7d9c\u5408\u5916\u96fb\u5831\u5c0e<\/p>\n<p>\u4eba\u5de5\u667a\u6167\u9818\u57df\u7814\u7a76\u4eba\u54e1\u8fd1\u65e5\u63d0\u51fa\u4e00\u9805\u540d\u70ba\u300c\u81ea\u6211\u84b8\u993e\u5f37\u5316\u5b78\u7fd2\u300d\uff08RLSD\uff09\u7684\u65b0\u578b\u7b56\u7565\uff0c\u65e8\u5728\u89e3\u6c7a\u5927\u578b\u8a9e\u8a00\u6a21\u578b\uff08LLM\uff09\u8a13\u7df4\u904e\u7a0b\u4e2d\uff0c\u5982\u4f55\u540c\u6642\u7dad\u6301\u5b78\u7fd2\u7a69\u5b9a\u6027\u8207\u63d0\u5347\u6548\u7387\u7684\u6311\u6230\u3002\u9019\u9805\u7814\u7a76\u7d50\u5408\u4e86\u50b3\u7d71\u4e0a\u5169\u7a2e\u4e3b\u8981\u7684\u6a21\u578b\u8a13\u7df4\u9014\u5f91\uff0c\u70baAI\u958b\u767c\u5e36\u4f86\u65b0\u7684\u65b9\u5411\u3002<\/p>\n<p>\u73fe\u884c\u5927\u578b\u8a9e\u8a00\u6a21\u578b\u7684\u8a13\u7df4\u4e3b\u8981\u6709\u5169\u7a2e\u7b56\u7565\u3002\u5176\u4e00\u662f\u300c\u7b56\u7565\u84b8\u993e\u300d\uff08on-policy distillation, OPD\uff09\uff0c\u900f\u904e\u4e00\u500b\u5927\u578b\u7684\u300c\u6559\u5e2b\u6a21\u578b\u300d\u5728\u6bcf\u500b\u6b65\u9a5f\u4e2d\uff0c\u70ba\u300c\u5b78\u751f\u6a21\u578b\u300d\u63d0\u4f9b\u5bc6\u96c6\u3001\u7d30\u7dfb\u5230\u8a5e\u5143\uff08token\uff09\u5c64\u7d1a\u7684\u6307\u5c0e\u3002\u53e6\u4e00\u7a2e\u662f\u300c\u53ef\u9a57\u8b49\u734e\u52f5\u5f37\u5316\u5b78\u7fd2\u300d\uff08reinforcement learning with verifiable rewards, RLVR\uff09\uff0c\u9019\u7a2e\u65b9\u6cd5\u5f9e\u74b0\u5883\u4e2d\u7372\u53d6\u7a00\u758f\u3001\u4e8c\u9032\u4f4d\u7684\u56de\u994b\uff08\u4f8b\u5982\u6b63\u78ba\u6216\u932f\u8aa4\u3001\u5df2\u89e3\u6c7a\u6216\u672a\u89e3\u6c7a\uff09\uff0c\u4e26\u8207\u771f\u5be6\u60c5\u6cc1\u9023\u7d50\u3002<\/p>\n<p>\u7136\u800c\uff0c\u7814\u7a76\u767c\u73fe\u7d14\u7cb9\u7684\u300c\u7b56\u7565\u81ea\u6211\u84b8\u993e\u300d\uff08on-policy self-distillation, OPSD\uff09\u2014\u2014\u5373\u6a21\u578b\u672c\u8eab\u540c\u6642\u626e\u6f14\u5b78\u751f\u8207\u6559\u5e2b\u89d2\u8272\u2014\u2014\u5b58\u5728\u7a69\u5b9a\u6027\u554f\u984c\u3002\u5118\u7ba1\u5728\u521d\u671f\u80fd\u8fc5\u901f\u6539\u5584\u6a21\u578b\u8868\u73fe\uff0c\u4f46\u5f88\u5feb\u5c31\u6703\u56e0\u70ba\u300c\u8cc7\u8a0a\u6d29\u6f0f\u300d\u5c0e\u81f4\u5b78\u7fd2\u5d29\u6f70\u3002\u9019\u662f\u7531\u65bc\u6559\u5e2b\u6a21\u578b\u77e5\u8b58\u6e90\u81ea\u5b78\u751f\u6a21\u578b\u81ea\u8eab\uff0c\u6703\u4e0d\u65b7\u5f37\u5316\u932f\u8aa4\u800c\u975e\u4fee\u6b63\uff0c\u9032\u800c\u7522\u751f\u4e0d\u7a69\u5b9a\u3001\u5bb9\u6613\u81ea\u6211\u6eff\u8db3\u4f46\u7121\u6cd5\u6cdb\u5316\u7684\u89e3\u6c7a\u65b9\u6848\u3002\u5728\u7d14\u7cb9\u7684OPSD\u4e2d\uff0c\u5b78\u751f\u8207\u6559\u5e2b\u6a21\u578b\u9593\u7684\u514b\u52de\u723e\u6563\u5ea6\uff08KL divergence\uff09\u8868\u73fe\u51fa\u4e0d\u7a69\u5b9a\u6027\uff0c\u986f\u793a\u6a21\u578b\u5728\u9707\u76ea\u4e2d\u5b78\u7fd2\u5230\u4e0d\u7a69\u56fa\u7684\u89e3\u65b9\u3002<\/p>\n<p>\u76f8\u8f03\u4e4b\u4e0b\uff0cRLVR\u96d6\u80fd\u63d0\u4f9b\u53ef\u9760\u7684\u65b9\u5411\u6027\u56de\u994b\uff0c\u4f46\u7576\u6b63\u78ba\u7d50\u679c\u51fa\u73fe\u983b\u7387\u4e0d\u9ad8\u6642\uff0c\u5b78\u7fd2\u9032\u5ea6\u6703\u986f\u5f97\u7de9\u6162\u3002\u800c\u81ea\u6211\u84b8\u993e\u96d6\u80fd\u63d0\u4f9b\u5bc6\u96c6\u7684\u5373\u6642\u56de\u994b\uff0c\u537b\u56e0\u7f3a\u4e4f\u5916\u90e8\u4e8b\u5be6\u7684\u4f9d\u64da\uff0c\u5bb9\u6613\u5f15\u767c\u8cc7\u8a0a\u6d29\u6f0f\u3002<\/p>\n<p>\u70ba\u89e3\u6c7a\u4e0a\u8ff0\u554f\u984c\uff0c\u7814\u7a76\u4eba\u54e1\u63d0\u51fa\u4e86RLSD\u7b56\u7565\u3002\u6b64\u65b9\u6cd5\u7cbe\u5de7\u5730\u5c07RLVR\u8207\u81ea\u6211\u84b8\u993e\u7684\u89d2\u8272\u5206\u958b\u3002RLSD\u5229\u7528RLVR\u63d0\u4f9b\u7a00\u758f\u4f46 anchored \u65bc\u771f\u5be6\u60c5\u6cc1\u7684\u65b9\u5411\u6027\u56de\u994b\uff0c\u540c\u6642\u904b\u7528\u81ea\u6211\u84b8\u993e\u4f86\u6821\u6e96\u7b56\u7565\u68af\u5ea6\u66f4\u65b0\u7684\u5e45\u5ea6\uff0c\u800c\u975e\u76f4\u63a5\u4f5c\u70ba\u5b78\u7fd2\u76ee\u6a19\u3002\u9019\u7a2e\u65b9\u5f0f\u900f\u904e\u8a08\u7b97\u5b78\u751f\u8207\u6559\u5e2b\u6a21\u578b\u5728\u8a5e\u5143\u5c64\u7d1a\u7684\u7b56\u7565\u5dee\u7570\uff0c\u5c07\u5b78\u7fd2\u91cd\u9ede\u653e\u5728\u5206\u6b67\u6700\u5927\u7684\u5340\u57df\u3002<\/p>\n<p>\u900f\u904e\u7d50\u5408\u5169\u8005\u512a\u52e2\uff0cRLSD\u7b56\u7565\u907f\u514d\u4e86\u8cc7\u8a0a\u6d29\u6f0f\u554f\u984c\uff0c\u56e0\u70ba\u5b78\u7fd2\u65b9\u5411\u4f86\u81ea\u74b0\u5883\uff0c\u800c\u5b78\u7fd2\u5e45\u5ea6\u7684\u6821\u6e96\u5247\u4f86\u81ea\u6a21\u578b\u672c\u8eab\u7684\u81ea\u6211\u6bd4\u8f03\u3002\u6839\u64da\u7814\u7a76\u7d50\u679c\u986f\u793a\uff0cRLSD\u4e0d\u50c5\u80fd\u7dad\u6301RLVR\u7684\u8a13\u7df4\u7a69\u5b9a\u6027\uff0c\u66f4\u80fd\u9054\u5230\u6bd4\u55ae\u7d14RLVR\u66f4\u9ad8\u7684\u6536\u6582\u4e0a\u9650\u3002\u9019\u9805\u6df7\u5408\u5f0f\u65b9\u6cd5\u4f7f\u5b78\u7fd2\u80fd\u66f4\u96c6\u4e2d\u65bc\u6b63\u78ba\u8def\u5f91\u4e2d\u7684\u95dc\u9375\u6b65\u9a5f\uff0c\u4ee5\u53ca\u932f\u8aa4\u8def\u5f91\u4e2d\u5bb9\u6613\u51fa\u932f\u7684\u74b0\u7bc0\uff0c\u70baAI\u6a21\u578b\u8a13\u7df4\u5e36\u4f86\u66f4\u667a\u6167\u3001\u66f4\u9ad8\u6548\u7684\u89e3\u6c7a\u65b9\u6848\u3002<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/h.sunmedia.tw\/hook\/05f8e341-0e40-474f-9f1f-ae3a44826eb0\" alt=\"\"><\/p>\n<div class=\"jeg_post_tags\"><span>\u6a19\u7c64\uff1a<\/span> \u5408\u4f5c\u5a92\u9ad4\u5546\u50b3\u5a92<\/div>\n<\/div>\n","protected":false},"excerpt":{"rendered":"<p>\u5716\uff0f\u672c\u5831AI\u88fd\u5716\uff08\u793a\u610f\u5716\uff09 \u5546\u50b3\u5a92\uff5c\u8449\u5b89\u5ead\uff0f\u7d9c\u5408\u5916\u96fb\u5831\u5c0e \u4eba\u5de5\u667a\u6167\u9818\u57df\u7814\u7a76\u4eba\u54e1\u8fd1\u65e5\u63d0\u51fa\u4e00\u9805\u540d\u70ba\u300c\u81ea\u6211\u84b8\u993e\u5f37\u5316\u5b78 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":3332,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[14],"tags":[],"class_list":{"0":"post-3331","1":"post","2":"type-post","3":"status-publish","4":"format-standard","5":"has-post-thumbnail","7":"category-14"},"_links":{"self":[{"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/posts\/3331","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/comments?post=3331"}],"version-history":[{"count":0,"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/posts\/3331\/revisions"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/media\/3332"}],"wp:attachment":[{"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/media?parent=3331"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/categories?post=3331"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/100bjy.com\/index.php\/wp-json\/wp\/v2\/tags?post=3331"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}