streamtuner2: Artifact [a6099b9e41]

Artifact a6099b9e415c072fa5aed789ae697fba2b5d193e:

File channels/internet_radio.py — part of check-in [cfb8433564] at 2024-08-16 20:45:23 on branch trunk — quick fix for changed ?server= url wrapper (user: mario, size: 9169) [annotate] [blame] [check-ins using]
# api: streamtuner2
# title: Internet-Radio
# description: Broad list of webradios from all genres.
# type: channel
# category: radio
# version: 1.6
# url: http://www.internet-radio.com/
# config:
#    { name: internetradio_max_pages,  type: int,  value: 5,  category: limit,  description: How many pages to fetch and read. }
# priority: standard
# png:
#   iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABHNCSVQICAgIfAhkiAAAAaZJREFUOI2N0j1PlEEUBeBnlsVoRJcCPwINxIJGAoWVFbVK4g8w
#   oUUTFRNbG3+FhVErK36BhcYCrTR8FS4mRGUXFEKCUizRwLXgnfV1Awk3mUzmnHPvPXNnUkSE40RKFYxhHKMYxFb1GIlnMLVN/etBUjuGWDm6wEHHyQbLW/Qd
#   JTu8QEq9mJlnogz3sHOJV3iHz2iKzuDiH+bm+J3XD74EU5Gc7pSn/4aYUi14s8BIhkZ5puKhvdgpNFVccaA5oaP7TO66SCuYKnG9weMmjaz5yadqqfvkPE/z
#   8TLTIp4U3I01ljY5f/gQu1LPGvWS7Rel5NtLzOzRlfk+Ngd4i48Ke9PZVpNGUCvwawvsZm6db8GtoLt9s4iIFotZFEwXybU1VjO+z4egv/MVKlIaqTMMJ2nh
#   eeH0wQYX4BwbiQkRTSmdktJ1KY3lGYznuw3zWsS2lLo2WMl4P49ycjCbn3k0pfuVg5m1432e4zr9UGMbLwv8avmP/OJOBQOlAsvF3hYNMititzg2Kuxn7iyr
#   VbSG/tltFHt3CVvATRBRH0lpEvfwXXL3L9zE/NEe0EfHAAAAAElFTkSuQmCC
# extraction-method: regex, dom
#
# Internet-Radio.co.uk/.com is one of the largest stream directories.
# Available music genre classifications are mirrored verbatim and flatly.
#
# The new version of this plugin alternates between PyQuery and Regex
# station extraction. Both overlook some paid or incomplete entries.
# HTTP retrieval happens in one batch, determined by the number of pages
# setting, rather than the global max_streams option.
#


from channels import *
import re
from config import *
import ahttp
from pq import pq




# streams and gui
class internet_radio (ChannelPlugin):

    # control data
    listformat = "pls"
    categories = []
    base_url = "https://www.internet-radio.com/"
    has_search = True


    # load genres
    def update_categories(self):
    
        html = ahttp.get(self.base_url)
        rx = re.compile("""="/stations/[-+&.\w\s%]+/">([^<]+)<""")
        cats = rx.findall(html)
        cats = list(set(cats))
        cats = [s.capitalize() for s in cats]
        self.categories = sorted(list(set(cats)))


    # fetch station lists
    def update_streams(self, cat, search=None):
    
        entries = []
        if not search and cat not in self.categories:
            return []

        rx_pages = re.compile('href="/stations/[-+\w%\d\s]+/page(\d+)">\d+</a>')

        # Fetch multiple pages at once
        html = []
        max_pages = max(int(conf.internetradio_max_pages), 1)
        for page in range(1, max_pages):
        
            # Append HTML source
            if search:
                html.append(
                    ahttp.get("%ssearch/?radio=%s%s" % (self.base_url, search, "&page=%s" % page if page>1 else ""))
                )
            else:
                html.append(
                    ahttp.get("%sstations/%s/%s" % (self.base_url, cat.lower().replace(" ", "%20"), "page%s" % page if page>1 else ""))
                )

            # Is there a next page?
            if str(page+1) not in rx_pages.findall(html[-1]):
                break
            self.parent.status(float(page)/float(max_pages+1), timeout=1)

        # Alternatively try regex or pyquery parsing
        #log.HTTP(html)
        entries = self.from_html(html)
            
        # fin
        log.FINISHED("internet_radio.update_streams")
        return entries


    # Switch update method
    @use_rx
    def from_html(self, html, use_rx):
        if use_rx:
            return self.with_regex(html)
        else:
            return self.with_dom(html)


    # Regex extraction
    def with_regex(self, html):
        log.PROC("internet-radio, regex")
        r = []
        html = "\n".join(html)
        
        # Break up into <tr> blocks before extracting bits
        rx_tr = re.compile("""<tr[^>]*>(.+?)</tr>""", re.S)
        rx_data = re.compile(r"""
               \?server=(https?://[^'">]+)
               .*?   <h4.*?>([^<>]+)</
               .*?   <b>([^<>]*)</b>
         (?:   .*?   href="(.*?)"        )?
         (?:   .*?   Genres:((?:</?a[^>]+>|\w+|\s+)+)    )?
               .*?   (\d+)\s*Listeners
               .*?   (\d+)\s*Kbps
        """, re.S|re.X)

        for div in rx_tr.findall(html):
            if div.find('id="pagination"') < 0:
                #log.DATA(len(div))
                uu = rx_data.search(div)
                if uu:
                    (url, title, playing, homepage, genres, listeners, bitrate) = uu.groups()
                    
                    # transform data
                    r.append({
                        "url": url,
                        "genre": strip_tags(genres or ""),
                        "homepage": ahttp.fix_url(homepage or ""),
                        "title": nl(title or ""),
                        "playing": nl(playing or ""),
                        "bitrate": int(bitrate or 0),
                        "listeners": int(listeners or 0),
                        "format": "audio/mpeg", # there is no stream info on that, but internet-radio.org.uk doesn't seem very ogg-friendly anyway, so we assume the default here
                    })
                else:
                    log.DATA("Regex couldn't decipher entry:", div)
        return r

    """
    [HTML] <tr>
			<td id="play_nohtml5_1" style="display: none;" width="74">
				<a title="PLS Playlist File" style="margin:1px" href="/servers/tools/playlistgenerator/?u=http://95.211.206.80:8000/listen.pls?sid=1&amp;t=.pls">
					<i class="text-danger mdi-av-play-circle-outline"/> .pls
				</a>
				<br/>
				<a title="M3U Playlist File" style="margin:1px" href="/servers/tools/playlistgenerator/?u=http://95.211.206.80:8000/listen.pls?sid=1&amp;t=.m3u">
					<i class="text-danger mdi-av-play-circle-outline"/> .m3u
				</a>
				<br/>
				<small>
					<a href="/community/threads/why-is-no-web-player-available.51536/" target="_blank">
						No Web Player
						<i class="text-danger mdi-action-help"/>
					</a>
				</small>
			</td>
			<td id="play_1" width="74" class="text-center">
<a title="Play in new window" style="cursor: pointer; margin:1px; text-decoration: none;" onclick="window.open('http://player.internet-radio.com/?server=http://95.211.206.80:8000','PopupPlayer','width=450,height=150');"><i style="font-size: 60px;" class="mdi-av-play-circle-outline"/></a><br/><small class="hidden-xs"><a title="PLS Playlist File" style="margin:1px" href="/servers/tools/playlistgenerator/?u=http://95.211.206.80:8000/listen.pls?sid=1&amp;t=.pls">.pls</a> <a title="M3U Playlist File" style="margin:1px" href="/servers/tools/playlistgenerator/?u=http://95.211.206.80:8000/listen.pls?sid=1&amp;t=.m3u">.m3u</a><br/><a style="cursor: pointer;" title="Play in new window" onclick="window.open('http://player.internet-radio.com/?server=http://95.211.206.80:8000','PopupPlayer','width=450,height=150');">popup <i style="font-size: 14px;" class="mdi-action-launch"/></a>			</small></td>
			<td>
				 <h4 class="text-danger" style="display: inline;">Radio Acacia</h4>
				<br/>
				<b>Nu Live Met Dj Joop En Dj Hansie veel luister plezier</b><br/>
				<a class="small text-success" href="http://www.radioacacia.nl" target="_blank">http://www.radioacacia.nl</a>

				<br/>Genres: various nederlands piraat hollands piraten duits engels polka <a href="/stations/folk/">folk</a>
			</td>
			<td width="120" class="text-right hidden-xs">
				<p style="margin-bottom: 4px;">
				82 Listeners<br/>
				<span title="High Quality"><i class="listingicon mdi-av-high-quality"/></span> 192 Kbps<br/>
				</p>
				<p style="margin: 0;">
				<br/><br/>
				</p>
			</td>
		</tr>

    """

    # DOM traversing
    def with_dom(self, html_list):
        log.PROC("internet-radio, dom")
        rx_numbers = re.compile("(\d+)")
        r = []
        for html in html_list:
            # the streams are arranged in table rows
            doc = pq(html)
            for dir in (pq(e) for e in doc("tr")):
                #log.HTML(dir)
                
                # bitrate/listeners
                bl = dir.find("p")
                if bl:
                    bl = rx_numbers.findall(str(bl.text()) + " 0 0")
                else:
                    bl = [0, 0]
                
                # stream url
                url = dir.find("a[onclick]").attr("onclick")
                if url:
                    url = re.search("(https?://[^\'\"\>]+)", url)
                    if url:
                        url = url.group(0)
                    else:
                        url = ""
                else:
                    url = ""
                
                row = {
                    "title": dir.find("h4").text(),
                    "homepage": ahttp.fix_url(dir.find("a.small").attr("href") or ""),
                    "url": url,
                    "genre": dir.find("a[href^='/stations/']").text() or "",
                    "listeners": int(bl[0]),
                    "bitrate": int(bl[1]),
                    "format": "audio/mpeg",
                    "playing": dir.find("b").text(),
                }
                #log.DATA(row)
                r.append(row)
        return r