Skip to content

Commit

Permalink
re-generate data
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaiyu Yang committed Mar 24, 2024
1 parent 78cee9d commit 6710e06
Showing 1 changed file with 46 additions and 47 deletions.
93 changes: 46 additions & 47 deletions scripts/generate-benchmark-lean4.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@
" num_tactics += len(tactics)\n",
" data.append(\n",
" {\n",
" \"url\": thm.repo.url,\n",
" \"commit\": thm.repo.commit,\n",
" \"url\": traced_repo.repo.url,\n",
" \"commit\": traced_repo.repo.commit,\n",
" \"file_path\": _get_file_path(traced_repo, thm),\n",
" \"full_name\": thm.theorem.full_name,\n",
" \"start\": list(thm.start),\n",
Expand Down Expand Up @@ -361,26 +361,26 @@
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2024-03-15 12:50:10.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m116\u001b[0m - \u001b[1mLoading the traced repo from /Users/yangky/.cache/lean_dojo/leanprover-community-mathlib4-3c307701fa7e9acbdc0680d7f3b9c9fed9081740/mathlib4\u001b[0m\n",
"2024-03-15 12:50:12,915\tINFO worker.py:1715 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
"100%|██████████| 5042/5042 [15:05<00:00, 5.57it/s] \n",
"\u001b[32m2024-03-15 13:05:29.599\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/leanprover/lean4-cli', commit='a751d21d4b68c999accb6fc5d960538af26ad5ec') relies on an unsupported Lean version: ec941735c80dc54c53948e30c428905b6600f95a\u001b[0m\n",
"\u001b[32m2024-03-15 13:05:44.688\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/leanprover/lean4-cli', commit='be8fa79a28b8b6897dce0713ef50e89c4a0f6ef5') relies on an unsupported Lean version: ec941735c80dc54c53948e30c428905b6600f95a\u001b[0m\n",
"\u001b[32m2024-03-15 13:05:54.625\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/xubaiw/CMark.lean', commit='0077cbbaa92abf855fc1c0413e158ffd8195ec77') relies on an unsupported Lean version: 8fc1af650ad6d31cf766d9bc84119149330e7d4e\u001b[0m\n",
"\u001b[32m2024-03-24 18:10:33.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mlean_dojo.data_extraction.trace\u001b[0m:\u001b[36mtrace\u001b[0m:\u001b[36m116\u001b[0m - \u001b[1mLoading the traced repo from /home/kaiyu/.cache/lean_dojo/leanprover-community-mathlib4-3c307701fa7e9acbdc0680d7f3b9c9fed9081740/mathlib4\u001b[0m\n",
"2024-03-24 18:10:35,340\tINFO worker.py:1743 -- Started a local Ray instance. View the dashboard at \u001b[1m\u001b[32m127.0.0.1:8265 \u001b[39m\u001b[22m\n",
"100%|██████████| 5042/5042 [08:53<00:00, 9.45it/s] \n",
"\u001b[32m2024-03-24 18:19:41.232\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/leanprover/lean4-cli', commit='a751d21d4b68c999accb6fc5d960538af26ad5ec') relies on an unsupported Lean version: ec941735c80dc54c53948e30c428905b6600f95a\u001b[0m\n",
"\u001b[32m2024-03-24 18:19:58.117\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/leanprover/lean4-cli', commit='be8fa79a28b8b6897dce0713ef50e89c4a0f6ef5') relies on an unsupported Lean version: ec941735c80dc54c53948e30c428905b6600f95a\u001b[0m\n",
"\u001b[32m2024-03-24 18:20:02.541\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/xubaiw/CMark.lean', commit='0077cbbaa92abf855fc1c0413e158ffd8195ec77') relies on an unsupported Lean version: 8fc1af650ad6d31cf766d9bc84119149330e7d4e\u001b[0m\n",
"Following Github server redirection from /repos/mhuisi/lean4-cli to /repositories/341363356\n",
"\u001b[32m2024-03-15 13:05:58.191\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/mhuisi/lean4-cli', commit='39229f3630d734af7d9cfb5937ddc6b41d3aa6aa') relies on an unsupported Lean version: 216d2460e0adec8317fdeeb6f2543cb7442564fd\u001b[0m\n",
"\u001b[32m2024-03-15 13:06:05.265\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/fgdorais/lean4-unicode-basic', commit='280d75fdfe7be8eb337be7f1bf8479b4aac09f71') relies on an unsupported Lean version: 0d7051497ea09b2b4a4ef608e371b8f317487c3c\u001b[0m\n",
"\u001b[32m2024-03-15 13:06:09.519\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/hargonix/LeanInk', commit='2447df5cc6e48eb965c3c3fba87e46d353b5e9f1') relies on an unsupported Lean version: f6cd6c069587cfe62dd68cb6330f9ad794a56724\u001b[0m\n",
"\u001b[32m2024-03-15 13:08:20.957\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_data\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1m116695 theorems in total\u001b[0m\n",
"\u001b[32m2024-03-15 13:08:20.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_randomly\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mSplitting the theorems randomly\u001b[0m\n",
"\u001b[32m2024-03-15 13:08:21.017\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_by_premise\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mSplitting the theorems by premises\u001b[0m\n",
"\u001b[32m2024-03-15 13:10:10.961\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1m112695 theorems and 233473 tactics saved to ../leandojo_benchmark_4/random/train.json\u001b[0m\n",
"\u001b[32m2024-03-15 13:10:18.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1m2000 theorems and 4248 tactics saved to ../leandojo_benchmark_4/random/val.json\u001b[0m\n",
"\u001b[32m2024-03-15 13:10:21.899\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1m2000 theorems and 4567 tactics saved to ../leandojo_benchmark_4/random/test.json\u001b[0m\n",
"\u001b[32m2024-03-15 13:11:30.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1m112695 theorems and 231240 tactics saved to ../leandojo_benchmark_4/novel_premises/train.json\u001b[0m\n",
"\u001b[32m2024-03-15 13:11:36.065\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1m2000 theorems and 5357 tactics saved to ../leandojo_benchmark_4/novel_premises/val.json\u001b[0m\n",
"\u001b[32m2024-03-15 13:11:37.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m49\u001b[0m - \u001b[1m2000 theorems and 5691 tactics saved to ../leandojo_benchmark_4/novel_premises/test.json\u001b[0m\n",
"\u001b[32m2024-03-15 13:13:12.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m73\u001b[0m - \u001b[1m168515 theorems/definitions from 5042 files saved to ../leandojo_benchmark_4/corpus.jsonl\u001b[0m\n"
"\u001b[32m2024-03-24 18:20:06.289\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/mhuisi/lean4-cli', commit='39229f3630d734af7d9cfb5937ddc6b41d3aa6aa') relies on an unsupported Lean version: 216d2460e0adec8317fdeeb6f2543cb7442564fd\u001b[0m\n",
"\u001b[32m2024-03-24 18:20:13.771\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/fgdorais/lean4-unicode-basic', commit='280d75fdfe7be8eb337be7f1bf8479b4aac09f71') relies on an unsupported Lean version: 0d7051497ea09b2b4a4ef608e371b8f317487c3c\u001b[0m\n",
"\u001b[32m2024-03-24 18:20:18.183\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mlean_dojo.data_extraction.lean\u001b[0m:\u001b[36m__post_init__\u001b[0m:\u001b[36m449\u001b[0m - \u001b[33m\u001b[1mLeanGitRepo(url='https://github.com/hargonix/LeanInk', commit='2447df5cc6e48eb965c3c3fba87e46d353b5e9f1') relies on an unsupported Lean version: f6cd6c069587cfe62dd68cb6330f9ad794a56724\u001b[0m\n",
"\u001b[32m2024-03-24 18:21:24.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_data\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1m116695 theorems in total\u001b[0m\n",
"\u001b[32m2024-03-24 18:21:24.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_randomly\u001b[0m:\u001b[36m18\u001b[0m - \u001b[1mSplitting the theorems randomly\u001b[0m\n",
"\u001b[32m2024-03-24 18:21:24.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36msplit_by_premise\u001b[0m:\u001b[36m8\u001b[0m - \u001b[1mSplitting the theorems by premises\u001b[0m\n",
"\u001b[32m2024-03-24 18:22:03.050\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1m112695 theorems and 234317 tactics saved to ../leandojo_benchmark_4/random/train.json\u001b[0m\n",
"\u001b[32m2024-03-24 18:22:03.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1m2000 theorems and 3988 tactics saved to ../leandojo_benchmark_4/random/val.json\u001b[0m\n",
"\u001b[32m2024-03-24 18:22:04.343\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1m2000 theorems and 3983 tactics saved to ../leandojo_benchmark_4/random/test.json\u001b[0m\n",
"\u001b[32m2024-03-24 18:22:28.950\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1m112695 theorems and 230581 tactics saved to ../leandojo_benchmark_4/novel_premises/train.json\u001b[0m\n",
"\u001b[32m2024-03-24 18:22:29.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1m2000 theorems and 5389 tactics saved to ../leandojo_benchmark_4/novel_premises/val.json\u001b[0m\n",
"\u001b[32m2024-03-24 18:22:30.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_proofs\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1m2000 theorems and 6318 tactics saved to ../leandojo_benchmark_4/novel_premises/test.json\u001b[0m\n",
"\u001b[32m2024-03-24 18:22:59.785\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mexport_premises\u001b[0m:\u001b[36m75\u001b[0m - \u001b[1m168515 theorems/definitions from 5042 files saved to ../leandojo_benchmark_4/corpus.jsonl\u001b[0m\n"
]
}
],
Expand Down Expand Up @@ -438,7 +438,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
" 5042\n"
"5042\n"
]
}
],
Expand Down Expand Up @@ -495,12 +495,11 @@
{
"data": {
"text/plain": [
"('Mathlib/RingTheory/Polynomial/Content.lean',\n",
" ['Mathlib/Data/Polynomial/FieldDivision.lean',\n",
" 'Mathlib/Data/Polynomial/EraseLead.lean',\n",
" '.lake/packages/lean4/src/lean/Init.lean',\n",
" 'Mathlib/Data/Polynomial/CancelLeads.lean',\n",
" 'Mathlib/Algebra/GCDMonoid/Finset.lean'])"
"('Mathlib/FieldTheory/Finiteness.lean',\n",
" ['Mathlib/LinearAlgebra/Dimension/Finite.lean',\n",
" 'Mathlib/LinearAlgebra/Dimension/Constructions.lean',\n",
" 'Mathlib/LinearAlgebra/Basis/VectorSpace.lean',\n",
" '.lake/packages/lean4/src/lean/Init.lean'])"
]
},
"execution_count": 10,
Expand All @@ -521,7 +520,7 @@
{
"data": {
"text/plain": [
"49"
"10"
]
},
"execution_count": 11,
Expand Down Expand Up @@ -550,10 +549,10 @@
{
"data": {
"text/plain": [
"{'full_name': 'Polynomial.IsPrimitive',\n",
" 'code': 'def IsPrimitive (p : R[X]) : Prop :=\\n ∀ r : R, C r ∣ p → IsUnit r',\n",
" 'start': [41, 1],\n",
" 'end': [43, 30],\n",
"{'full_name': 'IsNoetherian.iff_rank_lt_aleph0',\n",
" 'code': 'theorem iff_rank_lt_aleph0 : IsNoetherian K V ↔ Module.rank K V < ℵ₀',\n",
" 'start': [28, 1],\n",
" 'end': [42, 90],\n",
" 'kind': 'commanddeclaration'}"
]
},
Expand Down Expand Up @@ -645,8 +644,8 @@
"text/plain": [
"('https://github.com/leanprover-community/mathlib4',\n",
" '3c307701fa7e9acbdc0680d7f3b9c9fed9081740',\n",
" 'Mathlib/Algebra/GCDMonoid/Multiset.lean',\n",
" 'Multiset.lcm_add')"
" 'Mathlib/SetTheory/ZFC/Ordinal.lean',\n",
" 'ZFSet.IsTransitive.powerset')"
]
},
"execution_count": 15,
Expand Down Expand Up @@ -675,7 +674,7 @@
{
"data": {
"text/plain": [
"1"
"2"
]
},
"execution_count": 16,
Expand All @@ -697,24 +696,24 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 17,
"id": "a734d788",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'tactic': 'simp [lcm]',\n",
" 'annotated_tactic': ['simp [<a>lcm</a>]',\n",
" [{'full_name': 'Multiset.lcm',\n",
" 'def_path': 'Mathlib/Algebra/GCDMonoid/Multiset.lean',\n",
" 'def_pos': [39, 5],\n",
" 'def_end_pos': [39, 8]}]],\n",
" 'state_before': 'α : Type u_1\\ninst✝¹ : CancelCommMonoidWithZero α\\ninst✝ : NormalizedGCDMonoid α\\ns₁ s₂ : Multiset α\\n⊢ lcm (s₁ + s₂) = fold GCDMonoid.lcm (GCDMonoid.lcm 1 1) (s₁ + s₂)',\n",
" 'state_after': 'no goals'}"
"{'tactic': 'rw [mem_powerset] at hy ⊢',\n",
" 'annotated_tactic': ['rw [<a>mem_powerset</a>] at hy ⊢',\n",
" [{'full_name': 'ZFSet.mem_powerset',\n",
" 'def_path': 'Mathlib/SetTheory/ZFC/Basic.lean',\n",
" 'def_pos': [972, 9],\n",
" 'def_end_pos': [972, 21]}]],\n",
" 'state_before': 'x y✝ z✝ : ZFSet\\nh : IsTransitive x\\ny : ZFSet\\nhy : y ∈ powerset x\\nz : ZFSet\\nhz : z ∈ y\\n⊢ z ∈ powerset x',\n",
" 'state_after': 'x y✝ z✝ : ZFSet\\nh : IsTransitive x\\ny : ZFSet\\nhy : y ⊆ x\\nz : ZFSet\\nhz : z ∈ y\\n⊢ z ⊆ x'}"
]
},
"execution_count": 18,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -756,7 +755,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 6710e06

Please sign in to comment.