-
Notifications
You must be signed in to change notification settings - Fork 2
/
fetchPackages.py
75 lines (69 loc) · 1.89 KB
/
fetchPackages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import sys
import os
import requests
import json
from bs4 import BeautifulSoup
from ecosystemDataManager.ecosystemDataManager import EcosystemDataManager
def getContent(url):
request = requests.get(url)
if request.status_code != 200:
raise Exception
return request.text
def fetchNpm():
registry = 'https://skimdb.npmjs.com/registry/'
packages = getContent(os.path.join(registry, '_all_docs'))
packages = json.loads(packages)
packages = packages["rows"]
packageNames = []
for package in packages:
packageNames.append(package["id"])
return packageNames
def fetchRubygems():
registry = 'https://rubygems.org/'
packages = getContent(os.path.join(registry, 'versions'))
packages = packages.split("\n")
packages.pop(0)
packages.pop(0)
del packages[len(packages) - 1]
packageNames = []
for package in packages:
packageNames.append(package.split(" ")[0])
return packageNames
def fetchCran():
registry = "https://cran.r-project.org/web/packages/available_packages_by_name.html"
packages = getContent(registry)
soup = BeautifulSoup(packages, "lxml")
packageNames = []
for link in soup.findAll('a'):
if ('../../web/packages' in link.get('href')):
packageNames.append(link.getText())
return packageNames
def fetch(ecosystem):
if ecosystem == "npm":
return fetchNpm()
elif ecosystem == "rubygems":
return fetchRubygems()
elif ecosystem == "cran":
return fetchCran()
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Usage:", sys.argv[0], "<ecosystem> [<limit> [<home>]]")
sys.exit(1)
if len(sys.argv) > 2:
limit = int(sys.argv[2])
else:
limit = -1
if len(sys.argv) > 3:
home = sys.argv[3]
else:
home = ""
ecosystem = sys.argv[1]
ecosystemDataManager = EcosystemDataManager(ecosystem, home)
packages = fetch(ecosystem)
index = 0
for package in packages:
ecosystemDataManager.addPackage(package)
index += 1
if index == limit:
break
ecosystemDataManager.save()