Skip to content

Instantly share code, notes, and snippets.

@itherunder
Last active April 21, 2022 02:20
Show Gist options
  • Save itherunder/5253e8dc7a0459f29c35ffa5dd5340d9 to your computer and use it in GitHub Desktop.
Save itherunder/5253e8dc7a0459f29c35ffa5dd5340d9 to your computer and use it in GitHub Desktop.
TSE的部分脚本代码
OPCODES = [
# Stop and Arithmetic
'stop', 'add', 'mul', 'sub', 'div', 'sdiv', 'mod', 'smod', 'addmod', 'mulmod', 'exp', 'signextend',
# Comparison and Bitwise Logic
'lt', 'gt', 'slt', 'sgt', 'eq', 'iszero', 'and', 'or', 'xor', 'not', 'byte', 'shl', 'shr', 'sar',
# Sha3
'sha3',
# Environment Information
'address', 'balance', 'origin', 'caller', 'callvalue', 'calldataload', 'calldatasize', 'calldatacopy', 'codesize', 'codecopy', 'gasprice', 'extcodesize', 'extcodecopy', 'returndatasize', 'returndatacopy', 'extcodehash',
# These opcodes seem to belong in the environment 'block', but we are out of opcode space in 0x3*
'chainid', 'selfbalance',
# Block Information
'blockhash', 'coinbase', 'timestamp', 'number', 'difficulty', 'gaslimit',
# 'Stack', 'Memory', Storage and Flow Operations
'pop', 'mload', 'mstore', 'mstore8', 'sload', 'sstore', 'jump', 'jumpi', 'pc', 'msize', 'gas', 'jumpdest',
# Push Operations
'push1', 'push2', 'push3', 'push4', 'push5', 'push6', 'push7', 'push8', 'push9', 'push10', 'push11', 'push12', 'push13', 'push14', 'push15', 'push16', 'push17', 'push18', 'push19', 'push20', 'push21', 'push22', 'push23', 'push24', 'push25', 'push26', 'push27', 'push28', 'push29', 'push30', 'push31', 'push32',
# Duplicate Operations
'dup1', 'dup2', 'dup3', 'dup4', 'dup5', 'dup6', 'dup7', 'dup8', 'dup9', 'dup10', 'dup11', 'dup12', 'dup13', 'dup14', 'dup15', 'dup16',
# Exchange Operations
'swap1', 'swap2', 'swap3', 'swap4', 'swap5', 'swap6', 'swap7', 'swap8', 'swap9', 'swap10', 'swap11', 'swap12', 'swap13', 'swap14', 'swap15', 'swap16',
# Logging
'log0', 'log1', 'log2', 'log3', 'log4',
# System
'create', 'call', 'callcode', 'return', 'delegatecall', 'create2', 'staticcall', 'revert', 'selfdestruct'
]
'''
*: `内联汇编`
1. 多少个开源合约中有*
2. 每个有*的合约中有几段(多少个函数)中有*
3. 每一段*有多少行代码/多少字节码/哪些字节码
4. 是否优化
5. 有*的合约部署时间
6. 有*的合约的调用次数,其中*执行次数
7. 有多少次*执行失败,失败的原因
8. 为什么用*,是否可以用源码代替
9. *代码占自身合约的比例,指令频率比较
10. 不同合约*的相似性
11. 哪些Account创建了有*的合约(1.EOA or 2.SC)
12. 创建者身份推测
'''
import os, shutil, re
import subprocess, ujson
import execjs, sys, time
from opcode_value import *
from utils import *
# 设置最大递归次数(不然解析json的时候就崩了)
sys.setrecursionlimit(951116)
# 用来编译的js代码
cxt = execjs.compile(
'''
function compile(version, contract, optimal, times) {
var solc = require('solc')
var input = {
language: 'Solidity',
sources: {
'Task': {
content: contract
}
},
settings: {
optimizer: {
enabled: optimal,
runs: times
},
outputSelection: {
'*': {
'*': ['*']
}
}
}
};
solc = solc.setupMethods(require('./solc-bin-gh-pages/bin/soljson-' + version + '.js'))
var result = JSON.parse(solc.compile(JSON.stringify(input)))
return result
}
'''
)
# 1. 多少个开源合约中有*
def getContractsContainInline(contractspath):
# 总的*个数,
totalCounter, inlineCounter = 0, 0
for _, __, contracts in os.walk(contractspath):
for contract in contracts:
with open(contractspath+contract, 'r', encoding='utf8') as rf:
code = rf.read()
assemblies = re.findall(r'assembly\s*{', code)
if assemblies:
for assembly in assemblies:
if '\n' in assembly: print(len(assemblies), assembly)
inlineCounter += 1
shutil.copy(contractspath+contract, contractspath+'/../inlinecontracts/')
print('contract:', contract, 'totalCounter:', totalCounter, 'inlineCounter', inlineCounter)
# if 'assembly' in code:
# inlineCounter += 1
# shutil.copy(contractspath+contract, contractspath+'../inline_without_comments_contracts/')
# print('contract:', contract, 'totalCounter:', totalCounter, 'inlineCounter', inlineCounter)
totalCounter += 1
# 2. 每个有*的合约中有几段(多少个函数|修饰器)中有*
def getInlineCounterForEachContract(contractspath):
# 总的*个数,总的函数个数,总的包含*的函数个数,总的修饰器个数
totalModifierCounter, totalFunctionCounter = 0, 0
totalInlineCounter, totalFunctionInlineCounter = 0, 0
counter, total = 0, len(os.listdir(contractspath))
for contract in os.listdir(contractspath):
counter += 1
with open(contractspath+contract, 'r', encoding='utf8') as rf:
code = rf.read()
inlineCounter = len(re.findall(r'assembly\s*?{', code))
totalInlineCounter += inlineCounter
functionCounter = len(re.findall(r'function.*\(.*\)', code)) # 合约中的函数个数
functionCounter -= len(re.findall(r'function.*\(.*\);', code)) # 减去函数声明
modifierCounter = len(re.findall(r'modifier.*\{', code)) # 合约中修饰器的个数
totalFunctionCounter += functionCounter
totalModifierCounter += modifierCounter
functionInlineCounter = len(re.findall(r'(function|modifier)[\d\D]*?assembly\s*{', code)) # 合约中包含*的函数|修饰器个数
totalFunctionInlineCounter += functionInlineCounter
print('\rcontrct: %s %d/%i' % (contract, counter, total), end='')
print('\ntotaInlineCounter:%d inlineCounter:%d totalModifierCounter:%d modifierCounter:%d totalFunctionCounter:%d functionCounter:%d totalFunctionInlineCounter:%d functionInlineCounter:%d' % (totalInlineCounter, inlineCounter, totalModifierCounter, modifierCounter, totalFunctionCounter, functionCounter, totalFunctionInlineCounter, functionInlineCounter))
# 3. 每一段*有多少行代码,并将所有的* 放到一个文件中记录
def getInlineCode(inlinecontractspath, outputfile):
contracts = os.listdir(inlinecontractspath)
counter, total = 0, len(contracts)
for contract in contracts:
counter += 1
with open(outputfile, 'a', encoding='utf-8') as af:
af.write(contract + ':\n')
with open(inlinecontractspath+contract, 'r', encoding='utf8') as rf:
code = rf.read()
for assembly in re.finditer(r'assembly\s*?{', code):
index, stack = assembly.span()[1], ['{']
while True:
if code[index] == '{':
stack.append('{')
elif code[index] == '}':
stack.pop()
if not stack:
with open(outputfile, 'a', encoding='utf-8') as af:
inline_code = code[assembly.span()[0]:index+1]
af.write('%s\n' % (inline_code))
break
index += 1
print('\rcontract:%s %d/%d' % (contract, counter, total), end='')
def Log(logFile, message):
print(message)
with open(logFile, 'a', encoding='utf8') as af:
af.write(message+'\n')
def getInlineBytecode_(inlinecontractpath, addr2info, Rmod4):
counter = 0
for contract in os.listdir(inlinecontractpath):
counter += 1
if counter % 4 == Rmod4:
print('%s [INFO] contract: %s' % (time.strftime("%y-%m-%d %H:%M:%S", time.localtime()), contract))
mutexWriteFile('../logs/compile.log', 'a', '%s [INFO] contract: %s' % (time.strftime("%y-%m-%d %H:%M:%S", time.localtime()), contract))
rf = open(inlinecontractpath + contract, 'r', encoding='utf-8')
code = rf.read()
proc_code, inline_codes, count = code, [], 1
for assembly in re.finditer(r'assembly\s*?{', code):
index, stack = assembly.span()[1], ['{']
# count标识是第几段内联汇编
proc_inline_code = 'assembly {\nsstore(0x%d%d%d%d, 0x19951116)\n' % (count, count, count, count)
count += 1
while True:
proc_inline_code += '\nsstore(0x7777, 0x19980101)\n}' if (code[index] == '}' and len(stack) == 1) else code[index]
if code[index] == '{':
stack.append('{')
elif code[index] == '}':
stack.pop()
if not stack:
inline_code = code[assembly.span()[0]:index+1]
inline_codes.append(proc_inline_code)
break
index += 1
proc_code = proc_code.replace(inline_code, proc_inline_code)
# version, contarct, optimal, times
opti, times, vers = addr2info[contract]
try:
res = cxt.call('compile', vers, proc_code, opti == 'Yes', int(times))
except Exception as err:
mutexWriteFile('../data/usefullists/error_list', 'a', '[ERROR] @%s\n%s\n' % (contract, str(err)))
continue
with open('../data/compileresults_inline/%s' % contract, 'w', encoding='utf8') as wf:
wf.write(ujson.dumps(res))
bytecodes = re.findall(r'6319951116.{2}(\d+?)\1{3}55(.*?)6319980101', ujson.dumps(res))
bytecodes = set(bytecodes)
if bytecodes:
bf = open('../data/inlinebytecode/%s' % contract, 'a', encoding='utf-8')
for inline_code in inline_codes:
bf.write('%s\n' % inline_code)
for bytecode in bytecodes:
bf.write('%s#%s\n' % (bytecode[0], bytecode[1]))
bf.close()
else:
if 'errors' in res:
mutexWriteFile('../data/usefullists/nocompile_list', 'a', '%s\n' % str(res['errors']))
# 4. 获得所有*的bytecode
def getInlineBytecode(inlinecontractpath, versionlist):
vf = open(versionlist, 'r', encoding='utf-8')
# 存储合约对应的编译器版本,是否优化,优化次数信息
addr2info = {}
for line in vf.readlines():
addr, opti, times, vers = line.strip().split('#')
addr2info[addr] = (opti, times, vers)
# 这就要开多进程编译了,实在太慢了
pool = Pool(4)
for i in range(4):
pool.apply_async(getInlineBytecode_, args=(inlinecontractpath, addr2info, i))
pool.close()
pool.join()
# 4. 从获取得到的inline_code.txt 中把opcode 和function 拿出做统计
def getInlineOpcode(inlinecodepath):
opcode_counter, func_counter = {}, {}
with open(inlinecodepath, 'r', encoding='utf-8') as rf:
code = rf.read()
opcodes = re.findall(r'\w+\s?\(', code)
opcodes = [_[:-1] for _ in opcodes]
for opcode in opcodes:
opcode = opcode.strip()
if opcode in OPCODES:
opcode_counter[opcode] = opcode_counter.get(opcode, 0) + 1
else:
func_counter[opcode] = func_counter.get(opcode, 0) + 1 # 调用的自身的函数?
items = opcode_counter.items()
backitems=[[v[1],v[0]] for v in items]
backitems.sort(reverse=True)
for k, v in backitems:
print('|%s|%d|' % (v, k))
# 5. *的优化情况
def isOptimal(contractspath, versionlist):
vf = open(versionlist, 'r', encoding='utf-8')
# 存储合约对应的编译器版本,是否优化,优化次数信息
addr2info = {}
for line in vf.readlines():
addr, opti, times, vers = line.strip().split('#')
addr2info[addr] = (opti, times, vers)
optimalCounter, notOptiCounter = 0, 0
for contract in os.listdir(contractspath):
optimalCounter += addr2info[contract][0] == 'Yes'
notOptiCounter += addr2info[contract][0] == 'No'
print('%s: %d %d' %(contract, optimalCounter, notOptiCounter))
# 编译合约
def getCompileResult_(contractspath, addr2info, Rmod4, resultspath):
counter = 0
for contract in os.listdir(contractspath):
counter += 1
if counter % 4 == Rmod4:
if os.path.exists('%s%s' % (resultspath, contract)):
print('%s [INFO] contract: %s has existed!' % (time.strftime("%y-%m-%d %H:%M:%S", time.localtime()), contract))
mutexWriteFile('../logs/compile.log', 'a', '%s [INFO] contract: %s has existed!' % (time.strftime("%y-%m-%d %H:%M:%S", time.localtime()), contract))
continue
print('%s [INFO] contract: %s' % (time.strftime("%y-%m-%d %H:%M:%S", time.localtime()), contract))
mutexWriteFile('../logs/compile.log', 'a', '%s [INFO] contract: %s' % (time.strftime("%y-%m-%d %H:%M:%S", time.localtime()), contract))
rf = open(contractspath + contract, 'r', encoding='utf8')
code = rf.read()
# version, contarct, optimal, times
opti, times, vers = addr2info[contract]
try:
res = cxt.call('compile', vers, code, opti == 'Yes', int(times))
except Exception as err:
mutexWriteFile('../data/usefullists/error_list', 'a', '[ERROR] @%s\n%s\n' % (contract, str(err)))
continue
with open(resultspath+contract, 'w', encoding='utf8') as wf:
wf.write(ujson.dumps(res))
rf.close()
# 获取编译所有的结果
def getCompileResult(contractspath, versionlist, resultspath):
if not os.path.exists(resultspath):
os.mkdir(resultspath)
vf = open(versionlist, 'r', encoding='utf-8')
# 存储合约对应的编译器版本,是否优化,优化次数信息
addr2info = {}
for line in vf.readlines():
addr, opti, times, vers = line.strip().split('#')
addr2info[addr] = (opti, times, vers)
# 这就要开多进程编译了,实在太慢了
pool = Pool(4)
for i in range(4):
pool.apply_async(getCompileResult_, args=(contractspath, addr2info, i, resultspath))
pool.close()
pool.join()
# 根据ABI获取所有函数个数
def getFunctionNumber(contractpagespath):
for contract in os.listdir(contractpagespath):
print(contract)
# 拿编译时错误的合约(合约编写问题
def getCompileError(compileresultspath, outputpath):
if not os.path.exists(outputpath):
os.mkdir(outputpath)
for result in os.listdir(compileresultspath):
with open(compileresultspath+result, 'r', encoding='utf8') as rf:
res = ujson.loads(rf.read())
if not res['sources']:
shutil.copy('../data/contracts_total/%s' % result, '%s%s' % (outputpath, result))
print(result)
# 拿编译器错误的合约(调用编译器出错
def getCompilerError(compileresultspath, contractsfile, outputpath):
if not os.path.exists(outputpath):
os.mkdir(outputpath)
results = {}
for result in os.listdir(compileresultspath):
results[result] = 1
with open(contractsfile, 'r', encoding='utf8') as rf:
for line in rf.readlines():
if line.strip() not in results:
shutil.copy('../data/contracts_total/%s' % line.strip(), '%s%s' % (outputpath, line.strip()))
# 通过pages里面的abi获取函数个数
def getInlineFunctionByABI(inlinecontractspath, contractpagespath):
for contract in os.listdir(inlinecontractspath):
print(contract)
rf = open(contractpagespath+contract, 'r', encoding='utf8')
abi = ujson.loads(ujson.loads(rf.read())['result'][0]['ABI'])
for _ in abi:
# print(_['type'], _['name'])
print(_)
break
# 通过bytecode获取opcode
def getOpcode(bytecodespath, outputpath):
if not os.path.exists(outputpath):
os.mkdir(outputpath)
for contract in os.listdir(bytecodespath):
print('contract: %s' % contract)
os.system('evm disasm %s%s > %s/%s' % (bytecodespath, contract, outputpath, contract))
def main():
# init()
# getContractsContainInline('contracts/')
# getInlineBytecode('../data/contracts_inline/', '../data/usefullists/full_info_list')
# getInlineCode('../data/contracts_inline/', '../data/inline_code.txt')
# isOptimal('inlinecontracts/', 'lists/full_info_list')
# getOpcode('inline_code.txt')
# getCompileResult('../data/contracts_total/', '../data/usefullists/full_info_list', '../data/compileresults_total/')
# getCompileError('../data/compileresults_total/', '../data/contracts_total_compileerror/')
# getCompilerError('../data/compileresults_total/', '../data/contracts_total.txt', '../data/contracts_total_compilererror/')
# getCompileError('../data/compileresults_inline/', '../data/contracts_inline_compileerror/')
# getCompilerError('../data/compileresults_inline/', '../data/contracts_inline.txt', '../data/contracts_inline_compilererror/')
# getCompileResult('../data/contracts_compileerror/', '../data/usefullists/full_info_list', '../data/compileresults_compileerror/')
# getInlineFunctionByABI('../data/contracts_inline/', '../data/contract_pages_total/')
# getInlineCounterForEachContract('../data/contracts_inline/')
# getInlineCounterForEachContract('../data/contracts_total/')
# deleteSpaceLine('../data/contracts_total/')
# deleteSpaceLine('../data/contracts_inline/')
# getTotalCodeLine('../data/contracts_total/')
# getTotalCodeLine('../data/contracts_inline/')
getOpcode('../data/bytecodes_total/', '../data/opcodes_total/')
if __name__ == "__main__":
main()
# 6319951116.{2}(.*?){4}(.*?)(?:617777f3|f3)
git add *
git commit -m "提交一些东西0323"
git push
import os, execjs, shutil, re, json, sys
from bs4 import BeautifulSoup
from multiprocessing import Pool, Lock
__author__ = 'Zhou.Liao'
# 无法使用的编译器版本
ERRORSOLC = [
'v0.3.4-nightly.2016.6.6+commit.e97ac4f.js',
'v0.3.4-nightly.2016.6.8+commit.93790d.js',
'v0.3.4-nightly.2016.6.8+commit.ccddd6f.js',
'v0.3.4-nightly.2016.6.8+commit.d593166.js',
'v0.3.6-nightly.2016.8.27+commit.91d4fa4.js',
'v0.3.6-nightly.2016.8.29+commit.b8060c5.js',
'v0.3.6-nightly.2016.8.30+commit.cf974fd.js',
'v0.3.6-nightly.2016.8.31+commit.3ccd198.js',
'v0.3.6-nightly.2016.9.1+commit.b5d941d.js',
'v0.3.6-nightly.2016.9.2+commit.341c943.js',
'v0.3.6-nightly.2016.9.5+commit.873d8bb.js',
'v0.3.6-nightly.2016.9.6+commit.114502f.js',
'v0.3.6-nightly.2016.9.7+commit.24524d6.js',
'v0.3.6-nightly.2016.9.8+commit.f5a513a.js',
'v0.4.1-nightly.2016.9.9+commit.79867f4.js'
]
testCxt = execjs.compile(
'''
function compile(compiler) {
var solc = require('solc')
solc = solc.setupMethods(require('./solc-bin-gh-pages/bin/' + compiler))
}
'''
)
mutex = Lock()
# 函数修饰器,表示该函数执行时必须互斥
def mutexExec(func):
def mutexFunc(*args, **kwargs):
mutex.acquire()
func(*args, **kwargs)
mutex.release()
return mutexFunc
# 互斥写文件
@mutexExec
def mutexWriteFile(file, type, message):
with open(file, type, encoding='utf8') as fd:
fd.write(message)
if file == 'sleep':
with open(file, 'r', encoding='utf8') as rf:
flag = len(rf.readlines()) > 951116
if flag:
os.remove(file)
# -5. 格式化版本
def formatVersion(versionfile):
versions, counter = [], 0
allversions = os.listdir('D:/solc-bin-gh-pages/bin/')
with open(versionfile, 'r', encoding='utf-8') as rf:
for line in rf.readlines():
contract, optimal, times, version = line.strip().split('#')
if 'soljson-' + version + '.js' in allversions:
versions.append(line)
else:
counter += 1
print(version, counter)
edition = version[:6] if version[6] in ['+', '-'] else version[:7]
commit = version[-7:] if version[-8] in ['.', '-'] else version[-8:]
for allversion in allversions:
if edition in allversion and commit in allversion:
versions.append(contract + '#' + optimal + '#' + times + '#' + allversion[8:-3] + '\n')
break
with open(versionfile, 'w', encoding='utf-8') as wf:
for version in versions:
wf.write(version)
# -4. 编译失败的合约
def getFailed(contractspath, astspath):
inlineContracts, asts = [], []
for _, _, inlineContractFiles in os.walk(contractspath):
for inlineContract in inlineContractFiles:
inlineContracts.append(inlineContract.split('.')[0])
for _, _, astFiles in os.walk(astspath):
for ast in astFiles:
asts.append(ast.split('.')[0])
for inlineContract in inlineContracts:
if inlineContract not in asts:
with open('diff.txt', 'a', encoding='utf-8') as af:
af.write(inlineContract + '\n')
# -3. 删除编译头试试看
def deletePragma(contractspath):
for root, _, files in os.walk(contractspath):
for file in files:
all_code = ''
print(file)
with open('%s/%s' % (root, file), 'r', encoding='utf8') as rf:
for line in rf.readlines():
if 'pragma solidity' not in line:
all_code += line
with open('%s/%s' % (root, file), 'w', encoding='utf8') as wf:
wf.write(all_code)
# -2. 将转义符替换成有用字符
def replaceTran(contractspath):
for root, _, files in os.walk(contractspath):
for file in files:
code = None
print(file)
with open('%s/%s' % (root, file), 'r', encoding='utf8') as rf:
code = rf.read()
for transferword in TRANSFERWORD.keys():
code = code.replace(transferword, TRANSFERWORD[transferword])
with open('%s/%s' % (root, file), 'w', encoding='utf8') as wf:
wf.write(code)
# -1. 删除空白行
def deleteSpaceLine(contractspath):
for root, _, files in os.walk(contractspath):
for file in files:
print('\r%s' % file, end='')
all_code = ''
with open('%s/%s' % (root, file), 'r', encoding='utf8') as rf:
for line in rf.readlines():
if line.strip():
all_code += line
with open('%s/%s' % (root, file), 'w', encoding='utf8') as wf:
wf.write(all_code)
# 0. 删除注释
def deleteComments(contractspath):
pattern = re.compile(r'(//[^\n]+)|(/\*.+?\*/)', re.DOTALL)
for _, __, contracts in os.walk(contractspath):
for contract in contracts:
print('contract:', contract)
code_without_comments = None
with open(contractspath+contract, 'r', encoding='utf8') as rf:
code = rf.read()
code_without_comments = re.sub(pattern, '', code)
with open(contractspath + '/../without_comments_contracts/' + contract, 'w', encoding='utf8') as wf:
wf.write(code_without_comments)
# 获取合约代码,编译版本,是否优化及优化次数
def getInfo(page_path):
print(page_path)
rf = open(page_path, 'r', encoding='utf-8')
soup = BeautifulSoup(rf.read(), 'html.parser')
addr = soup.find(id='mainaddress').text.strip().lower()
code = soup.find(id='editor').text
tabs = soup.find_all(class_='table')
vers = tabs[4].find_all('td')[3].text.strip()
optm = tabs[5].find_all('td')[1].text.strip()
time = tabs[5].find_all('td')[3].text.strip()
cf = open('contracts/%s' % addr, 'w', encoding='utf-8')
df = open('lists/full_info_list', 'a', encoding='utf-8')
cf.write(code)
df.write('%s#%s#%s#%s\n' % (addr, optm, time, vers))
def getAllInfo(start):
for dir in os.listdir('pages/'):
for page in os.listdir('pages/%s' % dir):
if int(page.split('.')[0]) < start:
continue
getInfo('pages/%s/%s' % (dir, page))
# 获取合约调用次数
def getCallTimes(page_path):
totalTimes = 0
for page in os.listdir(page_path):
print(page, end=': ')
with open(page_path + page, 'r', encoding='utf8') as rf:
html = rf.read()
if 'from a total of' in html:
callTimes = int(re.findall(r"title='Click to view full list'>(.*?)</a>", html)[0].replace(',', ''))
totalTimes += callTimes
else:
res = re.findall(r'Latest (.*?) txn', html)
if res:
callTimes = int(res[0])
totalTimes += callTimes
else:
callTimes = 0
with open('calltimes.txt', 'a', encoding='utf8') as af:
af.write('%s#%d\n' % (page, callTimes))
print('call %d times! total times: %d' %(callTimes, totalTimes))
def testRequire():
for compiler in os.listdir('./solc-bin-gh-pages/bin/'):
print(compiler)
try:
testCxt.call('compile', compiler)
except Exception as err:
shutil.copy('./solc-bin-gh-pages/bin/%s' % compiler, './solc-bin-gh-pages/%s' % compiler)
def formatVersion():
vf = open('../data/usefullists/full_info_list.bak', 'r', encoding='utf-8')
infos, count, true_versions = [], 0, os.listdir('./solc-bin-gh-pages/bin/')
for line in vf.readlines():
print(count)
vers = line.strip().split('#')[-1]
# 需要对没有编译器(表示不同)做一个处理
if 'soljson-%s.js' % vers not in true_versions:
edition = re.findall(r'v(.*?)-', vers)[0]
commit = vers.split('-')[-1]
for true_version in true_versions:
if edition in true_version and commit[-5:-1] in true_version:
infos.append(line.replace(vers, true_version[8:-3]))
else:
infos.append(line)
count += 1
with open('../data/usefullists/full_info_list', 'w', encoding='utf8') as wf:
for info in infos:
wf.write(info)
def putContractsList(contractspath):
for contract in os.listdir(contractspath):
with open('contracts.txt', 'a', encoding='utf8') as af:
af.write(contract + '\n')
# 根据爬下来的网页获取创始者交易
def getCreateTx(contractpagespath):
af = open('createTxs.txt', 'a', encoding='utf8')
for contractpage in os.listdir(contractpagespath):
print(contractpage)
with open(contractpagespath + contractpage, 'r', encoding='utf8') as rf:
res = re.findall(r"title='Creator Address'>(.*?)</a>.*?<a href='/tx/(.*?)' title='Creator Txn Hash'", rf.read())
creator, creatTx = res[0]
af.write('%s#%s#%s\n' % (contractpage, creator, creatTx))
af.close()
# 根据爬下来的creator 和createTxs 界面输出这些个合约的创建时间以及创建区块
def putContractsCreateTime(creatorspath, createTxspath):
# for createTx in os.listdir(createTxspath):
# print(createTx)
# contract, txhash = createTx.split('#')
# with open(createTxspath + createTx, 'r', encoding='utf8') as rf:
# createNum, createTime = re.findall(r"<a href='/block/(.*?)'>[\s\S]*?mr-1'></i>(.*?)\s*?</div>", rf.read())[0]
# with open('createinfo.txt', 'a', encoding='utf8') as af:
# af.write('%s#%s#%s#%s\n' % (contract, txhash, createNum, createTime))
for creator in os.listdir(creatorspath):
print(creator)
contract, creatorAddr = creator.split('#')
with open(creatorspath + creator, 'r', encoding='utf8') as rf:
html = rf.read()
if 'The Address' in html and 'Contract Address' in html:
print('/////////////////////////////%s' % creator)
break
if 'The Address' in html:
addrtype = 'The Address'
if 'Contract Address' in html:
addrtype = 'Contract Address'
with open('creatorinfo.txt', 'a', encoding='utf8') as af:
af.write('%s#%s#%s\n' % (contract, creatorAddr, addrtype))
# 初始化一些信息
def init():
if not os.path.exists('lists'):
os.mkdir('lists')
if not os.path.exists('logs'):
os.mkdir('logs')
if not os.path.exists('inlinebytecode'):
os.mkdir('inlinebytecode')
# for list_file in os.listdir('lists/'):
# if 'info' not in list_file:
# os.remove('lists/%s' % list_file)
for log_file in os.listdir('logs/'):
os.remove('logs/%s' % log_file)
# for log_file in os.listdir('inlinebytecode/'):
# os.remove('inlinebytecode/%s' % log_file)
# 输出所有代码的行数以计算比例
def getTotalCodeLine(contractspath):
totallines, contracts = 0, os.listdir(contractspath)
counter, total = 0, len(contracts)
for contract in contracts:
with open(contractspath + contract, 'r', encoding='utf8') as rf:
curlines = len(rf.readlines())
totallines += curlines
print('\r%s: %d/%d' % (contract, counter, total), end='')
print('total line: %d' % totallines)
'''
前面的是后面的子集
'''
def diff(dir1, dir2):
files1, diffs = [], []
for file1 in os.listdir(dir1):
files1.append(file1)
for file2 in os.listdir(dir2):
if file2 not in files1:
diffs.append(file2)
shutil.copy('%s/%s' % (dir2, file2), '../diffs/%s' % file2)
shutil.copy('../data/contracts_inline/%s' % file2, '../diff_contracts/%s' % file2)
print(len(diffs), diffs)
def getSourceCode(contractspath):
for contract in os.listdir(contractspath):
print(contract)
rf = open(contractspath + contract, 'r', encoding='utf8')
res = json.loads(rf.read())
with open('contracts/%s' % contract, 'w', encoding='utf8') as wf:
wf.write(res['result'][0]['SourceCode'])
rf.close()
if __name__ == '__main__':
# getAllInfo(16842)
# testRequire()
# formatVersion()
# putContractsList('inlinecontracts/')
# getCreateTx('contract_pages/')
# putContractsCreateTime('creator_pages/', 'createTx_pages/')
# getCallTimes('contract_pages/')
# getTotalCodeLine('inlinecontracts/')
diff('../data/inlinebytecode', '../data/compileresults_inline')
# getSourceCode('getcode/contract_pages/')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment