Skip to content

Commit

Permalink
Added smartbugs dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
merendamattia committed Nov 5, 2024
1 parent 1e4ad64 commit 2eef85f
Show file tree
Hide file tree
Showing 312 changed files with 47,863 additions and 35 deletions.
166 changes: 134 additions & 32 deletions script-python/journal/compile-all.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import subprocess
import json
import csv
from tqdm import tqdm

def clear_directory(directory):
"""
Expand All @@ -14,6 +16,78 @@ def clear_directory(directory):
except Exception as e:
print(f"Error deleting {file_path}: {e}")

def load_version_mapping(version_file):
"""
Loads the compiler version mapping from a CSV file.
Returns a dictionary where the key is the file path and the value is the compiled version.
"""
version_mapping = {}
with open(version_file, mode='r') as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
version_mapping[row['file']] = row['compiled version']
return version_mapping

def compile_solidity_sources_with_different_version(source_dir, json_dir, version_file):
"""
Compiles all .sol files in the specified source directory using different versions of solc,
saving the bytecode for each file in JSON format in the specified output directory.
"""
version_mapping = load_version_mapping(version_file)
installed_versions = set() # Set to keep track of installed versions
count_success = 0
count_failure = 0

# Clear and create JSON output directory
clear_directory(json_dir)
os.makedirs(json_dir, exist_ok=True)

# List all .sol files in the source directory
sol_files = [f for f in os.listdir(source_dir) if f.endswith('.sol')]

# Progress bar setup
with tqdm(total=len(sol_files), desc="Compiling files...") as pbar:
for filename in sol_files:
# Full paths for input and output files
input_file = os.path.join(source_dir, filename)
output_file = os.path.join(json_dir, f"{os.path.splitext(filename)[0]}.json")

compiled_version = version_mapping.get(filename, None)

if compiled_version is None:
print(f"Version not specified for {filename} in {version_file}. Skipping file.")
pbar.update(1)
continue

# Install the version if not already installed
if compiled_version not in installed_versions:
try:
install_command = f"solc-select install {compiled_version} > /dev/null"
subprocess.run(install_command, shell=True, check=True)
installed_versions.add(compiled_version) # Mark version as installed
except subprocess.CalledProcessError as e:
print(f"Error installing solc version {compiled_version}: {e}")
pbar.update(1)
continue

# Command to compile and save the bytecode in JSON format
command = (
f"solc-select use {compiled_version} > /dev/null && "
f"solc --combined-json bin {input_file} > {output_file} 2> /dev/null"
)

# Execute the compilation command
try:
subprocess.run(command, shell=True, check=True)
count_success += 1
except subprocess.CalledProcessError as e:
count_failure += 1

# Update the progress bar
pbar.update(1)

print(f"Compiled successfully {count_success}/{count_success + count_failure} files.")

def compile_solidity_sources(source_dir, json_dir):
"""
Compiles all .sol files in the specified source directory using solc,
Expand All @@ -31,7 +105,7 @@ def compile_solidity_sources(source_dir, json_dir):
output_file = os.path.join(json_dir, f"{os.path.splitext(filename)[0]}.json")

# Command to compile and save the bytecode in JSON format
command = f"solc --optimize-runs 0 --combined-json bin,abi,bin-runtime,asm,opcodes --pretty-json {input_file} > {output_file}"
command = f"solc --optimize-runs 0 --combined-json bin --pretty-json {input_file} > {output_file}"

# Execute the command
try:
Expand Down Expand Up @@ -95,41 +169,56 @@ def extract_and_save_bytecode(bytecode_dir, json_dir, is_ethersolve=False):
clear_directory(bytecode_dir)
os.makedirs(bytecode_dir, exist_ok=True)

for json_filename in os.listdir(json_dir):
if json_filename.endswith(".json"):
json_filepath = os.path.join(json_dir, json_filename)
with open(json_filepath, 'r') as json_file:
# List all .sol files in the source directory
num_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]

data = json.load(json_file)
contracts = data.get("contracts", {})
count = 1 # Sequential counter for each bytecode in the same JSON
# Progress bar setup
with tqdm(total=len(num_files), desc="Extracting files...") as pbar:
for json_filename in os.listdir(json_dir):
if json_filename.endswith(".json"):
json_filepath = os.path.join(json_dir, json_filename)
with open(json_filepath, 'r') as json_file:
# Check if the file is empty by reading the first character
if json_file.read(1) == "":
print(f"Skipping empty file: {json_filename}")
pbar.update(1)
continue
json_file.seek(0) # Reset the file pointer to the beginning

data = json.load(json_file)
contracts = data.get("contracts", {})
count = 1 # Sequential counter for each bytecode in the same JSON

for contract_name, contract_data in contracts.items():
bytecode = contract_data.get("bin")
if bytecode:
# Add a sequential number to the filename
bytecode_filename = os.path.join(
bytecode_dir, f"{os.path.splitext(json_filename)[0]}_{count}.bytecode"
)
with open(bytecode_filename, 'w') as bytecode_file:
# Find the first occurrence of '60406040'
first_index = bytecode.find('60806040')

# Find the second occurrence of '60406040' after the first
second_index = bytecode.find('60806040', first_index + len('60806040'))

if is_ethersolve:
second_index = first_index

if first_index != -1 and second_index != -1:
bytecode = bytecode[second_index:]

bytecode_file.write("0x" + bytecode)
print(f"Extracted bytecode to {bytecode_filename}")
count += 1 # Increment counter for next bytecode
for contract_name, contract_data in contracts.items():
bytecode = contract_data.get("bin")
if bytecode:
# Add a sequential number to the filename
bytecode_filename = os.path.join(
bytecode_dir, f"{os.path.splitext(json_filename)[0]}_{count}.bytecode"
)
with open(bytecode_filename, 'w') as bytecode_file:
# Find the first occurrence of '60406040'
first_index = bytecode.find('60806040')

# Find the second occurrence of '60406040' after the first
second_index = bytecode.find('60806040', first_index + len('60806040'))

if is_ethersolve:
second_index = first_index

if first_index != -1 and second_index != -1:
bytecode = bytecode[second_index:]

bytecode_file.write("0x" + bytecode)
# print(f"Extracted bytecode to {bytecode_filename}")
count += 1 # Increment counter for next bytecode
# Update the progress bar
pbar.update(1)

if __name__ == "__main__":

# SolidiFI dataset
"""
compile_solidity_sources('./reentrancy-solidifi/source-code',
'./reentrancy-solidifi/json')
compile_solidity_sources('./vanilla-solidifi/source-code',
Expand All @@ -150,6 +239,7 @@ def extract_and_save_bytecode(bytecode_dir, json_dir, is_ethersolve=False):
'./reentrancy-solidifi/json',
True)
"""
"""
# EVMLiSA
extract_and_save_bytecode('./vanilla-solidifi/bytecode/evmlisa',
Expand All @@ -164,4 +254,16 @@ def extract_and_save_bytecode(bytecode_dir, json_dir, is_ethersolve=False):
extract_and_save_bytecode('./reentrancy-solidifi/bytecode/ethersolve',
'./reentrancy-solidifi/json',
True)
"""
"""

# smartbugs dataset
#"""
compile_solidity_sources_with_different_version('./reentrancy-smartbugs/source-code',
'./reentrancy-smartbugs/json',
'./reentrancy-smartbugs/source-code/version.csv')
#"""
extract_and_save_bytecode('./reentrancy-smartbugs/bytecode/evmlisa',
'./reentrancy-smartbugs/json')
extract_and_save_bytecode('./reentrancy-smartbugs/bytecode/ethersolve',
'./reentrancy-smartbugs/json',
True)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0x606060405233600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550341561005057600080fd5b61021f8061005f6000396000f30060606040526004361061004c576000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff168063a6f9dae114610051578063bd9b6d861461008a575b600080fd5b341561005c57600080fd5b610088600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190505061009f565b005b341561009557600080fd5b61009d610139565b005b3373ffffffffffffffffffffffffffffffffffffffff16600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16141561013657806000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff1602179055505b50565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff1614156101f1576000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff16600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff1602179055505b5600a165627a7a72305820ab207ed20b37aed224084a8b94a4854e1520f5fc867e36a31ddcc2930f9da8190029
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0x606060405233600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff16021790555033600260006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550341561009157600080fd5b6103d8806100a06000396000f300606060405260043610610057576000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff168063a6f9dae11461005c578063bd9b6d8614610095578063c7de2d13146100aa575b600080fd5b341561006757600080fd5b610093600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190505061010b565b005b34156100a057600080fd5b6100a86101a5565b005b34156100b557600080fd5b610109600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803590602001909190803573ffffffffffffffffffffffffffffffffffffffff1690602001909190505061025f565b005b3373ffffffffffffffffffffffffffffffffffffffff16600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1614156101a257806000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff1602179055505b50565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff16141561025d576000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff16600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff1602179055505b565b3373ffffffffffffffffffffffffffffffffffffffff16600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1614156103a7578273ffffffffffffffffffffffffffffffffffffffff1660405180807f7472616e7366657228616464726573732c75696e743235362900000000000000815250601901905060405180910390207c0100000000000000000000000000000000000000000000000000000000900482846040518363ffffffff167c0100000000000000000000000000000000000000000000000000000000028152600401808373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020018281526020019250505060006040518083038160008761646e5a03f192505050505b5050505600a165627a7a72305820ae1bbad7936862d72bb5fa0e7636ceaa42d50ab0ef00584e8d2733c46549b0720029
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0x606060405233600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff16021790555033600260006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550341561009157600080fd5b610832806100a06000396000f300606060405260043610610099576000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff16806311bcd830146100a3578063290b1e5f146100cc5780632e2a51bd146100e1578063a6f9dae114610142578063bd9b6d861461017b578063c7de2d1314610190578063ed21248c146101f1578063eec0ddd7146101fb578063f8ff612e14610248575b6100a161027f565b005b34156100ae57600080fd5b6100b66102d9565b6040518082815260200191505060405180910390f35b34156100d757600080fd5b6100df6102df565b005b34156100ec57600080fd5b610140600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803573ffffffffffffffffffffffffffffffffffffffff16906020019091908035906020019091905050610331565b005b341561014d57600080fd5b610179600480803573ffffffffffffffffffffffffffffffffffffffff16906020019091905050610426565b005b341561018657600080fd5b61018e6104c0565b005b341561019b57600080fd5b6101ef600480803573ffffffffffffffffffffffffffffffffffffffff1690602001909190803590602001909190803573ffffffffffffffffffffffffffffffffffffffff1690602001909190505061057a565b005b6101f961027f565b005b341561020657600080fd5b610232600480803573ffffffffffffffffffffffffffffffffffffffff169060200190919050506106c7565b6040518082815260200191505060405180910390f35b61027d600480803573ffffffffffffffffffffffffffffffffffffffff169060200190919080359060200190919050506106df565b005b6003543411156102d75734600460003373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825401925050819055505b565b60035481565b33600260006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff160217905550670de0b6b3a7640000600381905550565b3373ffffffffffffffffffffffffffffffffffffffff16600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff161415610421576000600460008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020541115610420576000600460008573ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020019081526020016000208190555061041f82828561057a565b5b5b505050565b3373ffffffffffffffffffffffffffffffffffffffff16600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1614156104bd57806000806101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff1602179055505b50565b6000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff161415610578576000809054906101000a900473ffffffffffffffffffffffffffffffffffffffff16600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908373ffffffffffffffffffffffffffffffffffffffff1602179055505b565b3373ffffffffffffffffffffffffffffffffffffffff16600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1614156106c2578273ffffffffffffffffffffffffffffffffffffffff1660405180807f7472616e7366657228616464726573732c75696e743235362900000000000000815250601901905060405180910390207c0100000000000000000000000000000000000000000000000000000000900482846040518363ffffffff167c0100000000000000000000000000000000000000000000000000000000028152600401808373ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1681526020018281526020019250505060006040518083038160008761646e5a03f192505050505b505050565b60046020528060005260406000206000915090505481565b3373ffffffffffffffffffffffffffffffffffffffff16600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff161415610802576000600460008473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020541115610801578173ffffffffffffffffffffffffffffffffffffffff168160405160006040518083038185876187965a03f192505050156108005780600460008473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168152602001908152602001600020600082825403925050819055505b5b5b50505600a165627a7a72305820956bfaf999d6dfbb815e943ca84885c081b185d110a33a90e56a46800506e3b20029
Loading

0 comments on commit 2eef85f

Please sign in to comment.