Offline mode implementation for Yandex.Music

Introduction



Today we will consider such a well-known music service as Yandex.Music. Good overall service, but with a significant drawback - the inability to work offline. We will try to correct this annoying misunderstanding using available tools.



Tools



So, we need:





Authorization



Unauthorized users of the service can only access segments of songs up to 30 seconds long. This is clearly not enough for quality listening. We will log in in the most natural way, through a web form and receive cookies. This will help us opener for making requests and HTMLParser for parsing forms.



def resolve_cookie(login: str, password: str) -> str:
    cookies = CookieJar()
    opener = urllib.request.build_opener(
        urllib.request.HTTPCookieProcessor(cookies),
        urllib.request.HTTPRedirectHandler())
    response = opener.open("https://passport.yandex.ru")
    doc = response.read()
    parser = FormParser()
    parser.feed(doc.decode("utf-8"))
    parser.close()
    parser.params["login"] = login
    response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
    doc = response.read()
    parser = FormParser()
    parser.feed(doc.decode("utf-8"))
    parser.close()
    parser.params["login"] = login
    parser.params["passwd"] = password
    response = opener.open(parser.url or response.url, urllib.parse.urlencode(parser.params).encode("utf-8"))
    cookie_data = {}
    for item in cookies:
        if item.domain == ".yandex.ru":
            cookie_data[item.name] = item.value
    if "yandex_login" not in cookie_data:
        keys = ", ".join(cookie_data.keys())
        raise Exception(f"Invalid cookie_data {keys}")
    return "; ".join(map(lambda v: f"{v[0]}={v[1]}", cookie_data.items()))


https://passport.yandex.ru login. , , . . β€” . yandex_login, . .



Yandex Music (HTML) API



. , aiohttp. html BeautifulSoup. , , -.



class YandexMusicApi:
    host = "music.yandex.ru"
    base_url = f"https://{host}"

    def __init__(self, cookie: str):
        self.headers = Headers(self.host, cookie)

    async def _request(self, end_point: str):
        async with aiohttp.ClientSession() as session:
            url = f"{self.base_url}/{end_point}"
            async with session.request(method="GET", url=url) as response:
                return await response.read()

    async def get_favorite_artists(self, login: str) -> List[Artist]:
        body = await self._request(f"users/{login}/artists")
        soup = BeautifulSoup(body, "lxml")
        artists_soup = soup.find("div", class_="page-users__artists")
        if artists_soup is None:
            caption = soup.find("div", class_="page-users__caption")
            if caption:
                raise Exception(caption.contents[0])
        result = []
        for artist_soup in artists_soup.find_all("div", class_="artist"):
            title_soup = artist_soup.find("div", class_="artist__name")
            title = title_soup.attrs["title"]
            title_href_soup = title_soup.find("a")
            id_ = int(title_href_soup.attrs["href"].split("/")[-1])
            result.append(Artist(id_, title))
        return result


, https://music.yandex.ru/users/<login>/artists page-users__artists . title artist__name. Id split .

, .





. , . β€” yandex-. , . Network , https://{host}/get-mp3/{sign}/{ts}/{path}, sign. (XGRlBW9FXlekgbPrRHuSiA) . , .



    async def get_track_url(self, album_id: int, track_id: int) -> str:
        async with aiohttp.ClientSession() as session:
            url = f"{self.base_url}/api/v2.1/handlers/track/{track_id}:{album_id}/" \
                  f"web-album-track-track-main/download/m?hq=0&external-domain={self.host}&overembed=no&__t={timestamp()}"
            page = f"album/{album_id}"
            headers = self.headers.build(page)
            async with session.request(method="GET", url=url, headers=headers) as response:
                body = await response.json()
                src = body["src"]
                src += f"&format=json&external-domain={self.host}&overembed=no&__t={timestamp()}"
                result = parse.urlparse(src)
                headers = self.headers.build(page, {
                    ":authority": "storage.mds.yandex.net",
                    ":method": "GET",
                    ":path": f"{result.path}/{result.query}",
                    ":scheme": "https",
                }, True)
                async with session.request(method="GET", url=src, headers=headers) as response:
                    body = await response.json()
                    host = body["host"]
                    path = body["path"]
                    s = body["s"]
                    ts = body["ts"]
                    sign = md5(f"XGRlBW9FXlekgbPrRHuSiA{path[1::]}{s}".encode("utf-8")).hexdigest()
                    url = f"https://{host}/get-mp3/{sign}/{ts}/{path}"
                    return url


, .





, aiohttp, β€” aiofile .



    async def download_file(cls, url: str, filename: str):
        async with aiohttp.ClientSession() as session:
            async with session.request(method="GET", url=url) as response:
                data = await response.read()
                async with AIOFile(filename, "wb") as afp:
                    await afp.write(data)
                    await afp.fsync()


. , , mp3, , , . , , . , . .





, . , , . , , . (- mp3 ).



    async def download_artist(self, artist: Artist, depth: Depth = Depth.NORMAL):
        artist_progress = tqdm(total=0, desc=artist.title, position=1, ascii=True)
        albums = await self.api.get_artist_albums(artist.id)
        artist_progress.total = len(albums)
        artist_progress.refresh()
        for album in albums:
            album_dir = os.path.join(self.target_dir, normalize(artist.title), f"{album.year} - {normalize(album.title)}")
            if depth < Depth.ALBUMS and os.path.exists(album_dir):
                artist_progress.update()
                continue
            album_progress = tqdm(total=0, desc=f"> {album.title}", position=0, ascii=True)
            tracks = await self.api.get_album_tracks(album.id)
            album_progress.total = len(tracks)
            album_progress.refresh()
            os.makedirs(album_dir, exist_ok=True)
            if album.cover:
                album_progress.total += 1
                cover_filename = os.path.join(album_dir, "cover.jpg")
                if not os.path.exists(cover_filename):
                    await self.download_file(album.cover, cover_filename)
                album_progress.update()
            for track in tracks:
                target_filename = os.path.join(album_dir, f"{track.num:02d}. {normalize(track.title)}.mp3")
                if depth >= Depth.TRACKS or not os.path.exists(target_filename):
                    url = await self.api.get_track_url(track.album_id, track.id)
                    await self.download_file(url, target_filename)
                    self.write_tags(target_filename, {
                        "title": track.title,
                        "tracknumber": str(track.num),
                        "artist": artist.title,
                        "album": album.title,
                        "date": str(album.year),
                    })
                album_progress.update()
            album_progress.close()
            artist_progress.update()
        artist_progress.close()


, , AC/DC . normalize:



def normalize(name: str) -> str:
    return name.replace("/", "-")


, ( ) . . asyncio.Semaphore asyncio.gather.

, .





, , , . .credentials, . .cookie .



def resolve_cookie() -> str:
    base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), os.path.pardir))
    cookie_file = os.path.join(base_dir, ".cookie")
    if os.path.exists(cookie_file):
        with open(cookie_file, "rt") as file:
            return file.read()
    credentials_file = os.path.join(base_dir, ".credentials")
    if os.path.exists(credentials_file):
        config = configparser.ConfigParser()
        config.read(credentials_file)
        login = config["yandex"]["login"]
        password = config["yandex"]["password"]
    else:
        raise Exception(f"""Create \"{credentials_file}\" with content

[yandex]
login=<user_login>
password=<user_password>
""")
    cookie = auth.resolve_cookie(login, password)
    with open(cookie_file, "wt") as file:
        file.write(cookie)
    return cookie


, , . argparse, .

:



  • -a (--artist), Id , ,
  • -o (--output), , β€” Music .
  • -d (--depth), ,

    • - 0 (NORMAL) , , ,
    • Value 1 (ALBUMS)loops through all the tracks in the album and downloads the missing ones
    • The value 2 (TRACKS)downloads and overwrites tracks even if they are already present in the file system


async def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--artist", help="Artist ID")
    parser.add_argument("-o", "--output", default=f"{Path.home()}/Music",
                        help=f"Output directory, default {Path.home()}/Music")
    parser.add_argument("-d", "--depth", default=0, type=int,
                        help=f"Exists files check depth, {enum_print(Depth)}")
    args = parser.parse_args()
    cookie = resolve_cookie()
    api = YandexMusicApi(cookie)
    agent = YandexMusicAgent(api, args.output)
    if args.artist:
        artist = await api.get_artist(args.artist)
        await agent.download_artist(artist, args.depth)
    else:
        email = re.compile(".*?yandex_login=(.*?);.*?", re.M).match(cookie).group(1)
        await agent.download_favorites(email, args.depth)


And now, finally, we can run it all:



if __name__ == "__main__":
    asyncio.run(main())


Thank you for attention. Now you know how to implement an API that does not exist, download the non-downloadable one and become the proud owner of your own music collection.



The result can be seen in the yandex.music.agent repository




All Articles