forked from fbkarsdorp/spotify-chart
-
Notifications
You must be signed in to change notification settings - Fork 0
/
charts.py
80 lines (71 loc) · 2.52 KB
/
charts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import io
import pandas as pd
import requests
import time
import tqdm
def week_dates(date, weekday=0):
week_start = date - pd.DateOffset(weekday=weekday, weeks=1)
week_end = date + pd.DateOffset(weekday=weekday, weeks=0)
return week_start, week_end
def get_chart(date, region='en', freq='daily', chart='top200'):
chart = 'regional' if chart == 'top200' else 'viral'
date = pd.to_datetime(date)
if date.year < 2017:
raise ValueError('No chart data available from before 2017')
if freq == 'weekly':
start, end = week_dates(date, weekday=4)
date = f'{start.date()}--{end.date()}'
else:
date = f'{date.date()}'
url = f'https://spotifycharts.com/{chart}/{region}/{freq}/{date}/download'
data = io.StringIO(requests.get(url).text)
try:
df = pd.read_csv(data)
except pd.errors.ParserError:
df = None
print(data)
return df
def get_charts(start, end, region='en', freq='daily', chart='top200', sleep=1):
sample = 'D' if freq == 'daily' else 'W'
dfs = []
for date in tqdm.tqdm(pd.date_range(start=start, end=end, freq=sample)):
df = get_chart(date, region=region, freq=freq, chart=chart)
if df is not None:
df['date'] = date
dfs.append(df)
time.sleep(sleep)
return pd.concat(dfs)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
'--start_date', required=True,
help='A date defining the start day for the chart.')
parser.add_argument(
'--outfile', required=True,
help='Save the results in this file.')
parser.add_argument(
'--end_date',
help='A date defining the end day for the chart.')
parser.add_argument(
'--region',
default='global',
help='A region defined for the chart.')
parser.add_argument(
'--freq',
choices=['daily', 'weekly'],
default='daily',
help='Use timestamps on a weekly or daily frequency.')
parser.add_argument(
'--chart',
choices=['top200', 'viral'],
default='top200',
help='The type of chart to retrieve.')
args = parser.parse_args()
if args.end_date is not None:
df = get_charts(args.start_date, args.end_date, region=args.region,
freq=args.freq, chart=args.chart)
else:
df = get_chart(args.start_date, region=args.region, freq=args.freq,
chart=args.chart)
df.to_csv(args.outfile)