GitHarvester/githarvester.py at master · metac0rtex/GitHarvester

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

#!/usr/bin/env python

# Import all the things!

import sys

import os

try:

import argparse

except:

print '[!] argparse is not installed. Try "pip install argparse"'

sys.exit(0)

try:

from urllib import urlopen

from urllib import urlretrieve

from urllib import urlencode

except:

print '[!] urllib is not installed. Try "pip install urllib"'

sys.exit(0)

try:

from bs4 import BeautifulSoup

except:

print '[!] BeautifulSoup is not installed. Try "pip install beautifulsoup4"'

sys.exit(0)

try:

import re

except:

print '[!] re is not installed. Try "pip install re"'

sys.exit(0)

try:

import pycurl

except:

print '[!] pycurl is not installed. Try "pip install pycurl"'

sys.exit(0)

# Display Startup Banner

def banner():

print ""

print " _____ _ _ _ _ _"

print " / ____(_) | | | | | | |"

print "| | __ _| |_ | |__| | __ _ _ ____ _____ ___| |_ ___ _ __ "

print "| | |_ | | __| | __ |/ _` | '__\ \ / / _ \/ __| __/ _ \ '__|"

print "| |__| | | |_ | | | | (_| | | \ V / __/\__ \ || __/ | "

print " \_____|_|\__| |_| |_|\__,_|_| \_/ \___||___/\__\___|_| "

print ""

print "Version 0.8"

print "By: @metacortex of @dc801"

print ""

# Parse GitHub search results

def githubsearch(search, regex, order, sort, account, project):

navbarlinks = []

if project:

githubbase = 'https://github.com/' + account + '/' + project + '/search?'

else:

githubbase = 'https://github.com/search?'

if account:

search = 'user:' + account + ' ' + search

githubsearchurl = {'o' : order, 'q' : search, 's' : sort, 'type' : 'Code', 'ref' : 'searchresults'}

searchurl = githubbase + str(urlencode(githubsearchurl))

if (order == 'asc'):

print '[+] Searching Github for ' + search + ' and ordering by OLDEST'

print searchurl

elif (order == 'desc'):

print '[+] Searching Github for ' + search + ' and ordering by NEWEST'

print searchurl

else:

print '[+] Searching Github for ' + search + ' and ordering by BEST MATCH'

print searchurl

searchresults = urlopen(searchurl).read()

soup = BeautifulSoup(searchresults, 'html.parser')

# Find the bottom nav bar and parse out those links

pagenav = soup.findAll('div', attrs={'class':'pagination'})

if pagenav:

for page in pagenav:

pages = page.findAll('a')

for a in pages:

navbarlinks.append(a)

try:

totalpages = int(str(re.findall(r">.*</a>", str(navbarlinks[-2]))).strip('[').strip(']').strip('\'').strip('>').strip('</a>')) # Because I suck at code

except IndexError:

print ' [!] Search error'

sys.exit(0)

print ' [+] Returned ' + str(totalpages) + ' total pages'

# Parse each page of results

currentpage = 1

while (currentpage <= totalpages):

parseresultpage(currentpage, search, order, sort, regex, account, project)

currentpage += 1

else:

print ' [+] Only one page of results'

parseresultpage(1, search, order, sort, regex, account, project)

def parseresultpage(page, search, order, sort, regex, account, project):

print ' [+] Pulling results from page ' + str(page)

if project:

githubbase = 'https://github.com/' + account + '/' + project + '/search?'

else:

githubbase = 'https://github.com/search?'

githubsearchurl = {'o' : order, 'p' : page, 'q' : search, 's' : sort, 'type' : 'Code', 'ref' : 'searchresults'}

searchurl = githubbase + str(urlencode(githubsearchurl))

pagehtml = urlopen(searchurl).read()

soup = BeautifulSoup(pagehtml, 'html.parser')

# Find GitHub div with code results

results = soup.findAll('div', attrs={'class':'code-list-item'})

# Pull url's from results and hit each of them

soup1 = BeautifulSoup(str(results), 'html.parser')

for item in soup1.findAll('p', attrs={'class':'title'}):

soup2 = BeautifulSoup(str(item), 'html.parser')

try:

individualresult = soup2.findAll('a')[1]

except:

individualresult = soup2.findAll('a')[0]

individualresulturl = 'https://github.com/' + str(individualresult['href'])

individualresultpage = urlopen(individualresulturl).read()

soup3 = BeautifulSoup(str(individualresultpage), 'html.parser')

for rawlink in soup3.findAll('a', attrs={'id':'raw-url'}):

rawurl = 'https://github.com' + str(rawlink['href'])

if (args.custom_regex):

searchcode(rawurl, regex)

else:

wpsearchcode(rawurl, regex)

def searchcode(url, regex):

code = urlopen(url).read()

result = ''

try:

regexresults = re.search(regex, str(code))

result = str(regexresults.group(0))

if result is not None:

if (args.url == True):

print " " + str(url)

if (args.verbose == True):

print " [+] Found the following results"

print " " + str(result)

if args.write_file:

if (result == ''):

pass

else:

f = open(args.write_file, 'a')

f.write(str(result + '\n'))

f.close()

if args.directory:

filename = args.directory + "/" + url.replace('/', '-')

if not os.path.exists(args.directory):

os.makedirs(args.directory)

print " [+] Downloading " + filename

urlretrieve(url, filename)

fp = open(filename, 'wb')

fp.write(code)

fp.close()

else:

pass

except:

pass

#This whole function is confusing as hell FYI

def wpsearchcode(url, regex):

code = urlopen(url).read()

try:

regexdb = re.search(r"define\(\'DB_NAME.*;", str(code), re.IGNORECASE)

regexuser = re.search(r"define\(\'DB_USER.*;", str(code), re.IGNORECASE)

regexpass = re.search(r"define\(\'DB_PASSWORD.*;", str(code), re.IGNORECASE)

regexhost = re.search(r"define\(\'DB_HOST.*;", str(code), re.IGNORECASE)

db = str(regexdb.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_NAME:')

user = str(regexuser.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_USER:')

password = str(regexpass.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_PASSWORD:')

host = str(regexhost.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_HOST:')

if (db == '\', '): # Check for blank database because...shitty code

db = ''

if (user == '\', '): # Check for blank user because...shitty code

user = ''

if (password == '\', '): # Check for blank password because...shitty code

password = ''

if (host == '\', '): # Check for blank host because...shitty code

host = ''

if (args.verbose == True):

print ' [+] Found the following credentials'

if (args.url == True):

print ' ' + str(url)

print ' database: ' + db

print ' user: ' + user

print ' password: ' + password

print ' host: ' + host

if args.write_file:

f = open(args.write_file, 'a')

results = 'Database: ' + db + '\nUser: ' + user + '\nPassword: ' + password + '\nHost: ' + host + '\n---\n'

f.write(results)

f.close()

except:

pass

def main():

banner() # Brandwhore

# Parsing arguments

parser = argparse.ArgumentParser(description='This tool is used for harvesting information from GitHub. By default it looks for code with the filename of \'wp-config.php\' and pulls out auth info')

parser.add_argument('-a', action='store', dest='account', help='Specify a specific user account', type=str)

parser.add_argument('-d', action='store', dest='directory', help='Download results to a specific directory', type=str)

parser.add_argument('-o', action='store', dest='organize', help='Organize results by \'new\', \'old\', \'best\', or \'all\'', type=str)

parser.add_argument('-p', action='store', dest='project', help='Specific project to search. Use with -a', type=str)

parser.add_argument('-r', action='store', dest='custom_regex', help='Custom regex string', type=str)

parser.add_argument('-s', action='store', dest='custom_search', help='Custom GitHub search string', type=str)

parser.add_argument('-u', '--url', action='store_true', help='Output URL of found object')

parser.add_argument('-v', '--verbose', action='store_true', help='Turn verbose output on. This will output matched lines')

parser.add_argument('-w', action='store', dest='write_file', help='Write results to a file', type=str)

global args

args = parser.parse_args()

if not len(sys.argv) > 1:

args.verbose = True

if (args.project):

if not args.account:

print '[!] Need -u for -p'

parser.print_help()

sys.exit(0)

if args.account:

account = args.account

print '[+] Searching the account ' + account

if args.project:

project = args.project

print '[+] Searching the ' + project + ' project'

else:

project = None

else:

account = None

project = None

if args.custom_search:

search = args.custom_search

print '[+] Custom search is: ' + str(search)

else:

search = 'filename:wp-config.php'

print '[+] Using default search'

if args.custom_regex:

regex = args.custom_regex

print '[+] Custom regex is: ' + str(regex)

else:

regex = 'regexhere'

print '[+] Using default regex'

if (args.organize == 'new'):

githubsearch(search, regex, 'desc', 'indexed', account, project)

elif (args.organize == 'old'):

githubsearch(search, regex, 'asc', 'indexed', account, project)

elif (args.organize == 'best'):

githubsearch(search, regex, '', '', account, project)

elif (args.organize == 'all'):

githubsearch(search, regex, '', '', account, project)

githubsearch(search, regex, 'desc', 'indexed', account, project)

githubsearch(search, regex, 'asc', 'indexed', account, project)

else:

githubsearch(search, regex, '', '', account, project)

print '[+] DONE'

try:

if __name__ == "__main__":

main()

except KeyboardInterrupt:

print "[!] Keyboard Interrupt. Shutting down"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

githarvester.py

githarvester.py

Files

githarvester.py

Latest commit

History

githarvester.py

File metadata and controls