import pandas as pd
from dataclasses import dataclass, field, asdict
from typing import List, Tuple
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import billboard
from collections import defaultdict, Counter
from models import *
#spotipy wraps the official spotify api providing simple python functions.
# TODO: Replace these two variables with the client_id and client_secret that you generated
CLIENT_ID = 3ef1d84df6444d63bbcc100de7ea59b3
CLIENT_SECRET = 534a5b6c545d49a3a3fe0ce33e13ee84
#https://developer.spotify.com/dashboard/applications to get client_id and client_secret
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
client_secret=CLIENT_SECRET))
def getPlaylist(id: str) -> List[Track]:
Given a playlist ID, returns a list of Track objects corresponding to the songs on the playlist. See
models.py for the definition of dataclasses Track, Artist, and AudioFeatures.
We need the audio features of each track to populate the audiofeatures list.
We need the genre(s) of each artist in order to populate the artists in the artist list.
Weve written parts of this function, but its up to you to complete it!
# fetch tracks data from spotify given a playlist id
playlistdata = sp.playlist(id)
tracks = playlistdata[tracks][items]
# fetch audio features based on the data stored in the playlist result
track_ids = [i[track][id] for i in tracks]
audio_features = sp.audio_features(track_ids)
audio_info = {}# Audio features list might not be in the same order as the track list
for af in audio_features:
audio_info[af[id]] = AudioFeatures(af[danceability],
af[energy],
af[key],
af[loudness],
af[mode],
af[speechiness],
af[acousticness],
af[instrumentalness],
af[liveness],
af[valence],
af[tempo],
af[duration_ms],
af[time_signature],
af[id])
# prepare artist dictionary
all_artist_ids = []
for i in tracks:
n = i[track][album][artists]
for k in n:
all_artist_ids.append(k[id])
artist_ids = []# TODO: make a list of unique artist ids from tracks list
for j in all_artist_ids:
if j not in artist_ids:
artist_ids.append(j)
artists = {}
for k in range(1+len(artist_ids)//50): # can only request info on 50 artists at a time!
artists_response = sp.artists(artist_ids[k*50:min((k+1)*50,len(artist_ids))]) #what is this doing?
for a in artists_response[artists]:
artists[a[id]] = Artist(a[id],
a[name],
a[genres])# TODO: create the Artist for each id (see audio_info, above)
# populate track dataclass
trackList = [Track(id = t[track][id],
name= t[track][name],
artists= [artists[t[track][album][artists][i][id]]
for i in range(len(t[track][album][artists]))],
audio_features= audio_info[t[track][id]])
for t in tracks]
print(trackList[0]) #print to inspect
print(len(artist_ids))
return trackList
this function is just a way of naming the list were using. You can write
additional functions like top Canadian hits! if you want.
def getHot100() -> List[Track]:
# Billboard hot 100 Playlist ID URI
hot_100_id = 6UeSakyzhiEt4NB3UAd6NQ
return getPlaylist(hot_100_id)
#
# part1: implement helper functions to organize data into DataFrames
def getGenres(t: Track) -> List[str]:
TODO
Takes in a Track and produce a list of unique genres that the artists of this track belong to
all_genres = []
for i in t.artists:
for j in i.genres:
all_genres.append(j)
unique_genres = []
for k in all_genres:
if k not in unique_genres:
unique_genres.append(k)
return unique_genres
def doesGenreContains(t: Track, genre: str) -> bool:
TODO
Checks if the genres of a track contains the key string specified
For example, if a Tracks unique genres are [pop, country pop, dance pop]
doesGenreContains(t, dance) == True
doesGenreContains(t, pop) == True
doesGenreContains(t, hip hop) == False
if getGenres(t).count(genre)==1:
return True
else:
return False
def getTrackDataFrame(tracks: List[Track]) -> pd.DataFrame:
This function is given.
Prepare dataframe for a list of tracks
audio-features: danceability, energy, key, loudness, mode, speechiness,
acousticness, instrumentalness, liveness, valence, tempo,
duration_ms, time_signature, id,
track & artist: track_name, artist_ids, artist_names, genres,
is_pop, is_rap, is_dance, is_country
# populate records
records = []
for t in tracks:
to_add = asdict(t.audio_features) #converts the audio_features object to a dict
to_add[track_name] = t.name
to_add[artist_ids] = list(map(lambda a: a.id, t.artists)) # we will discuss this in class
to_add[artist_names] = list(map(lambda a: a.name, t.artists))
to_add[genres] = getGenres(t)
to_add[is_pop] = doesGenreContains(t, pop)
to_add[is_rap] = doesGenreContains(t, rap)
to_add[is_dance] = doesGenreContains(t, dance)
to_add[is_country] = doesGenreContains(t, country)
records.append(to_add)
# create dataframe from records
df = pd.DataFrame.from_records(records)
return df
# minor testing code:
top100Tracks = getHot100()
df = getTrackDataFrame(top100Tracks)
# you may want to experiment with the dataframe now!
#
# Part2: The most popular artist of the week
def artist_with_most_tracks(tracks: List[Track]) -> (Artist, int):
TODO
List of tracks -> (artist, number of tracks the artist has)
This function finds the artist with most number of tracks on the list
If there is a tie, you may return any of the artists
all_artist_names = []
for i in tracks:
for j in i.artists:
all_artist_names.append(j[name])
tally = Counter(all_artist_names)
return tally.most_common(1) # these structures will be useful!
# minor testing code:
artist, num_track = artist_with_most_tracks(top100Tracks)
print(%s has the most number of tracks on this weeks Hot 100 at a whopping %d tracks! % (artist.name, num_track))
# Part3: Data Visualization
# 3.1 scatter plot of dancability-tempo colored by genre is_rap
# 3.2 scatter plot (ask your own question)
Reviews
There are no reviews yet.