Introduction
Today we will consider such a well-known music service as Yandex.Music. Good overall service, but with a significant drawback - the inability to work offline. We will try to correct this annoying misunderstanding using available tools.
Tools
So, we need:
- Relatively fresh python : 3.7 and up
- Any async: aiohttp and aiofile
- Classic tool for working with html-API: BeautifulSoup
- To entertain the user during the process: tqdm
- To fill in tags: mutagen
Authorization
Unauthorized users of the service can only access segments of songs up to 30 seconds long. This is clearly not enough for quality listening. We will log in in the most natural way, through a web form and receive cookies. This will help us opener for making requests and HTMLParser for parsing forms.
def resolve_cookie(login: str, password: str) -> str:
cookies = CookieJar()
opener = urllib.request.build_opener(
urllib.request.HTTPCookieProcessor(cookies),
urllib.request.HTTPRedirectHandler())
response = opener.open("https://passport.yandex.ru")
doc = response.read()
parser = FormParser()
parser.feed(doc.decode("utf-8"))
parser.close()
parser.params["login"] = login
response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
doc = response.read()
parser = FormParser()
parser.feed(doc.decode("utf-8"))
parser.close()
parser.params["login"] = login
parser.params["passwd"] = password
response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
cookie_data = {}
for item in cookies:
if item.domain == ".yandex.ru":
cookie_data[item.name] = item.value
if "yandex_login" not in cookie_data:
keys = ", ".join(cookie_data.keys())
raise Exception(f"Invalid cookie_data {keys}")
return "; ".join(map(lambda v: f"{v[0]}={v[1]}", cookie_data.items()))
https://passport.yandex.ru
login
. , , . . β . yandex_login
, . .
Yandex Music (HTML) API
. , aiohttp. html BeautifulSoup. , , -.
class YandexMusicApi:
host = "music.yandex.ru"
base_url = f"https://{host}"
def __init__(self, cookie: str):
self.headers = Headers(self.host, cookie)
async def _request(self, end_point: str):
async with aiohttp.ClientSession() as session:
url = f"{self.base_url}/{end_point}"
async with session.request(method="GET", url=url) as response:
return await response.read()
async def get_favorite_artists(self, login: str) -> List[Artist]:
body = await self._request(f"users/{login}/artists")
soup = BeautifulSoup(body, "lxml")
artists_soup = soup.find("div", class_="page-users__artists")
if artists_soup is None:
caption = soup.find("div", class_="page-users__caption")
if caption:
raise Exception(caption.contents[0])
result = []
for artist_soup in artists_soup.find_all("div", class_="artist"):
title_soup = artist_soup.find("div", class_="artist__name")
title = title_soup.attrs["title"]
title_href_soup = title_soup.find("a")
id_ = int(title_href_soup.attrs["href"].split("/")[-1])
result.append(Artist(id_, title))
return result
, https://music.yandex.ru/users/<login>/artists
page-users__artists
. title
artist__name
. Id split
.
, .
. , . β yandex-. , . Network
, https://{host}/get-mp3/{sign}/{ts}/{path}
, sign
. (XGRlBW9FXlekgbPrRHuSiA
) . , .
async def get_track_url(self, album_id: int, track_id: int) -> str:
async with aiohttp.ClientSession() as session:
url = f"{self.base_url}/api/v2.1/handlers/track/{track_id}:{album_id}/" \
f"web-album-track-track-main/download/m?hq=0&external-domain={self.host}&overembed=no&__t={timestamp()}"
page = f"album/{album_id}"
headers = self.headers.build(page)
async with session.request(method="GET", url=url, headers=headers) as response:
body = await response.json()
src = body["src"]
src += f"&format=json&external-domain={self.host}&overembed=no&__t={timestamp()}"
result = parse.urlparse(src)
headers = self.headers.build(page, {
":authority": "storage.mds.yandex.net",
":method": "GET",
":path": f"{result.path}/{result.query}",
":scheme": "https",
}, True)
async with session.request(method="GET", url=src, headers=headers) as response:
body = await response.json()
host = body["host"]
path = body["path"]
s = body["s"]
ts = body["ts"]
sign = md5(f"XGRlBW9FXlekgbPrRHuSiA{path[1::]}{s}".encode("utf-8")).hexdigest()
url = f"https://{host}/get-mp3/{sign}/{ts}/{path}"
return url
, .
async def download_file(cls, url: str, filename: str):
async with aiohttp.ClientSession() as session:
async with session.request(method="GET", url=url) as response:
data = await response.read()
async with AIOFile(filename, "wb") as afp:
await afp.write(data)
await afp.fsync()
. , , mp3, , , . , , . , . .
, . , , . , , . (- mp3 ).
async def download_artist(self, artist: Artist, depth: Depth = Depth.NORMAL):
artist_progress = tqdm(total=0, desc=artist.title, position=1, ascii=True)
albums = await self.api.get_artist_albums(artist.id)
artist_progress.total = len(albums)
artist_progress.refresh()
for album in albums:
album_dir = os.path.join(self.target_dir, normalize(artist.title), f"{album.year} - {normalize(album.title)}")
if depth < Depth.ALBUMS and os.path.exists(album_dir):
artist_progress.update()
continue
album_progress = tqdm(total=0, desc=f"> {album.title}", position=0, ascii=True)
tracks = await self.api.get_album_tracks(album.id)
album_progress.total = len(tracks)
album_progress.refresh()
os.makedirs(album_dir, exist_ok=True)
if album.cover:
album_progress.total += 1
cover_filename = os.path.join(album_dir, "cover.jpg")
if not os.path.exists(cover_filename):
await self.download_file(album.cover, cover_filename)
album_progress.update()
for track in tracks:
target_filename = os.path.join(album_dir, f"{track.num:02d}. {normalize(track.title)}.mp3")
if depth >= Depth.TRACKS or not os.path.exists(target_filename):
url = await self.api.get_track_url(track.album_id, track.id)
await self.download_file(url, target_filename)
self.write_tags(target_filename, {
"title": track.title,
"tracknumber": str(track.num),
"artist": artist.title,
"album": album.title,
"date": str(album.year),
})
album_progress.update()
album_progress.close()
artist_progress.update()
artist_progress.close()
, , AC/DC
. normalize
:
def normalize(name: str) -> str:
return name.replace("/", "-")
, ( ) . . asyncio.Semaphore asyncio.gather.
, .
, , , . .credentials
, . .cookie
.
def resolve_cookie() -> str:
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
cookie_file = os.path.join(base_dir, ".cookie")
if os.path.exists(cookie_file):
with open(cookie_file, "rt") as file:
return file.read()
credentials_file = os.path.join(base_dir, ".credentials")
if os.path.exists(credentials_file):
config = configparser.ConfigParser()
config.read(credentials_file)
login = config["yandex"]["login"]
password = config["yandex"]["password"]
else:
raise Exception(f"""Create \"{credentials_file}\" with content
[yandex]
login=<user_login>
password=<user_password>
""")
cookie = auth.resolve_cookie(login, password)
with open(cookie_file, "wt") as file:
file.write(cookie)
return cookie
, , . argparse, .
:
- -a (--artist), Id , ,
- -o (--output), , β
Music
. - -d (--depth), ,
- -
0 (NORMAL)
, , , - Value
1 (ALBUMS)
loops through all the tracks in the album and downloads the missing ones - The value
2 (TRACKS)
downloads and overwrites tracks even if they are already present in the file system
- -
async def main():
parser = argparse.ArgumentParser()
parser.add_argument("-a", "--artist", help="Artist ID")
parser.add_argument("-o", "--output", default=f"{Path.home()}/Music",
help=f"Output directory, default {Path.home()}/Music")
parser.add_argument("-d", "--depth", default=0, type=int,
help=f"Exists files check depth, {enum_print(Depth)}")
args = parser.parse_args()
cookie = resolve_cookie()
api = YandexMusicApi(cookie)
agent = YandexMusicAgent(api, args.output)
if args.artist:
artist = await api.get_artist(args.artist)
await agent.download_artist(artist, args.depth)
else:
email = re.compile(".*?yandex_login=(.*?);.*?", re.M).match(cookie).group(1)
await agent.download_favorites(email, args.depth)
And now, finally, we can run it all:
if __name__ == "__main__":
asyncio.run(main())
Thank you for attention. Now you know how to implement an API that does not exist, download the non-downloadable one and become the proud owner of your own music collection.
The result can be seen in the yandex.music.agent repository