Source code for mendevi.download.torrent_finder
"""Search for available torrent files."""
import pathlib
import shutil
import requests
from context_verbose import Printer
from mendevi.utils import get_project_root
[docs]
def get_torrent(name: str) -> pathlib.Path:
"""Retrieve a torrent from its full name.
Parameters
----------
name : str
The full torrent name, for example "multithread.db.xz.torrent".
Returns
-------
path : pathlib.Path
The full path of the torrent.
Raises
------
KeyError
If the requested torrent is not included in the list of possible torrents.
"""
assert isinstance(name, str), name.__class__.__name__
# look in cachedir
cachedir = pathlib.Path.home() / ".cache" / "mendevi"
cachedir.mkdir(mode=0o777, parents=True, exist_ok=True)
if (file := cachedir / name).exists():
return file
# look on the local cloned mendevi git
local = {p.name: p for p in probe_local_torrent()}
if name in local:
# copy in mendevi cachedir for transmission permission
shutil.copy(local[name], cachedir / name)
return cachedir / name
# download online
with Printer(f"Download {name!r}...", color="green") as prt:
# look avalable list
prt.print("get url")
online = probe_online_torrent()
if name not in online:
msg = f"{name!r} not in {', '.join(sorted(set(local) | set(online)))}"
raise KeyError(msg)
# download torrent file
prt.print(f"download {online[name]}")
req = requests.get(online[name], stream=True, timeout=60)
req.raise_for_status()
torrent_data = req.raw.data
prt.print(f"{len(torrent_data)} bytes retrieved")
assert torrent_data
# write the file
with file.open("wb") as raw:
raw.write(torrent_data)
return file
[docs]
def probe_online_torrent() -> dict[str, str]:
"""Search on GitLab online for the names of available torrents.
Returns
-------
torrents : dict
For each torrent name, provide the URL for downloading it.
Examples
--------
>>> from pprint import pprint
>>> from mendevi.download.torrent_finder import probe_online_torrent
>>> pprint(sorted(probe_online_torrent()))
['colorspace.db.xz.torrent',
'ctc.db.xz.torrent',
'duration.db.xz.torrent',
'multithread.db.xz.torrent',
'ramdisk.db.xz.torrent',
'svtav1_vs_rav1e_vs_aom.db.xz.torrent',
'x264_vs_openh264.db.xz.torrent']
>>>
"""
url = "https://gitlab.inria.fr/api/v4/projects/rrichard%2Fmendevi/repository/tree?path=dataset"
req = requests.get(url, timeout=60)
req.raise_for_status()
all_files = req.json()
return {
f["name"]: f"https://gitlab.inria.fr/rrichard/mendevi/-/raw/main/{f['path']}"
for f in all_files if f["name"].endswith(".torrent")
}
[docs]
def probe_local_torrent() -> set[pathlib.Path]:
"""Search for the names of locally accessible torrents (if the mendevi repository is cloned).
Returns
-------
torrents : set[pathlib.Path]
Provide all the local torrent files.
Examples
--------
>>> from pprint import pprint
>>> from mendevi.download.torrent_finder import probe_local_torrent
>>> pprint(sorted(t.name for t in probe_local_torrent()))
['colorspace.db.xz.torrent',
'ctc.db.xz.torrent',
'duration.db.xz.torrent',
'multithread.db.xz.torrent',
'ramdisk.db.xz.torrent',
'svtav1_vs_rav1e_vs_aom.db.xz.torrent',
'x264_vs_openh264.db.xz.torrent']
>>>
"""
return set((get_project_root().parent / "dataset").glob("*.torrent"))