⌈⌋ branch:  freshcode


Check-in [dedb921ea3]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Fixed title/description extraction.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | trunk
Files: files | file ages | folders
SHA1:dedb921ea3352cc5a72c7167cea56f8124e208bc
User & Date: mario 2019-07-22 01:14:08
Context
2019-07-22
01:14
Fixed title/description extraction. Leaf check-in: dedb921ea3 user: mario tags: trunk
2018-06-07
21:35
Add freshermeat, repology to /links page; aux: more current spam keywords. check-in: 4420f00d61 user: mario tags: trunk
Changes

Changes to cron.daily/news_fossies.php.

1
2
3
4
5
6
7
8
9
10
11
12
..
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
..
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
<?php
/**
 * title: Fossies.org extraction
 * description: Retrieves from fossies.org and stores as sidebar feed .html.
 * version: 0.5
 * category: template
 * api: cli
 * type: cron
 * x-cron: 11 *\/4 * * * 
 *
 * Scrapes from fossies.org/linux/misc/index_n.html,
 * extracts title, version, time, and description.
................................................................................

// switch to webroot
chdir(dirname(__DIR__));


// Fossies
include("./shared.phar");
curl::$defaults["useragent"] = "freshcode/0.6 (Linux x86-64; curl) projects-autoupdate/0.5 (screenshots,changelog,regex,xpath) +http://freshcode.club/";
if ($html = curl("http://fossies.org/linux/misc/index_n.html")->exec()
and preg_match_all("~<TR>.+?</TR>~s", $html, $line))
{
    $output = "";
    # <TR><TD VALIGN="top"><A HREF="openmpi-1.8.2.tar.gz"><IMG SRC="/dl.gif"
    # class="dl" title="[Download]" ALT=""></A></TD><TD> <A
    # HREF="openmpi-1.8.2.tar.gz/" title="Contents, browsing \&amp; more
................................................................................

        // package name and version
        preg_match("~HREF=\"([\w-]+?)-(\d[\w._-]+?)(\.(zip|tar|gz|xz|bz2|pax|tgz|txt|tbz2|7z|exe))*/\"~", $html, $pkg);
        if (count($pkg) < 3) { continue; }
        list(, $pkg, $ver, ) = $pkg;

        // package title
        preg_match("~>([^<>]+)</A>~", $html, $title);
        $title = $title[1];

        // convert date string
        preg_match("~\((\d+ \w\w\w) \d\d:\d\d~", $html, $date);
        $date = strftime("%d/%m", strtotime($date[1]));
        
        // description
        preg_match("~</A>[\s-]*([^<>]+)</DIV>~", $html, $desc);
        $desc = htmlentities($desc[1]);
                
        // combine
        $output .= "<a href=\"http://fossies.org/$pkg\" title=\"$desc\">"
                .  "<small>$date</small> $title <em>$ver</em></a>\n";
    }

    // save
    file_put_contents("./template/feed.fossies.htm", $output);
}





|







 







|







 







|







|











1
2
3
4
5
6
7
8
9
10
11
12
..
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
..
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
<?php
/**
 * title: Fossies.org extraction
 * description: Retrieves from fossies.org and stores as sidebar feed .html.
 * version: 0.6
 * category: template
 * api: cli
 * type: cron
 * x-cron: 11 *\/4 * * * 
 *
 * Scrapes from fossies.org/linux/misc/index_n.html,
 * extracts title, version, time, and description.
................................................................................

// switch to webroot
chdir(dirname(__DIR__));


// Fossies
include("./shared.phar");
curl::$defaults["useragent"] = "freshcode/0.8 (Linux x86-64; curl) projects-autoupdate/0.5 (screenshots,changelog,regex,xpath) +http://freshcode.club/";
if ($html = curl("http://fossies.org/linux/misc/index_n.html")->exec()
and preg_match_all("~<TR>.+?</TR>~s", $html, $line))
{
    $output = "";
    # <TR><TD VALIGN="top"><A HREF="openmpi-1.8.2.tar.gz"><IMG SRC="/dl.gif"
    # class="dl" title="[Download]" ALT=""></A></TD><TD> <A
    # HREF="openmpi-1.8.2.tar.gz/" title="Contents, browsing \&amp; more
................................................................................

        // package name and version
        preg_match("~HREF=\"([\w-]+?)-(\d[\w._-]+?)(\.(zip|tar|gz|xz|bz2|pax|tgz|txt|tbz2|7z|exe))*/\"~", $html, $pkg);
        if (count($pkg) < 3) { continue; }
        list(, $pkg, $ver, ) = $pkg;

        // package title
        preg_match("~>([^<>]+)</a>~", $html, $title);
        $title = $title[1];

        // convert date string
        preg_match("~\((\d+ \w\w\w) \d\d:\d\d~", $html, $date);
        $date = strftime("%d/%m", strtotime($date[1]));
        
        // description
        preg_match("~</a>[\s-]*([^<>]+)</DIV>~", $html, $desc);
        $desc = htmlentities($desc[1]);
                
        // combine
        $output .= "<a href=\"http://fossies.org/$pkg\" title=\"$desc\">"
                .  "<small>$date</small> $title <em>$ver</em></a>\n";
    }

    // save
    file_put_contents("./template/feed.fossies.htm", $output);
}