⌈⌋ ⎇ branch:  freshcode


Check-in [f29bcd5c41]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Split news feeds and fossies.org scraping.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: f29bcd5c41cd0be1e838579399312a1b9e181ce8
User & Date: mario 2014-11-03 02:10:06
Context
2014-11-03
02:10
Add GitHub/Archive list and cache database polling. check-in: 1534cf257a user: mario tags: trunk
02:10
Split news feeds and fossies.org scraping. check-in: f29bcd5c41 user: mario tags: trunk
2014-10-28
21:16
int typecast for revision date display check-in: f27fa2251e user: mario tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to cron.daily/news_feeds.php.

1
2
3
4
5
6
7
8


9
10
11
12
13
14
15
16
17
18
19
20

21


22
23
24
25
26
27
28
29
30
31
32
33

34
35
36
37
38
39
40
<?php
/**
 * title: Article feeds
 * description: Queries a few online resources for article links
 * version: 0.4
 *
 * Highlights version numbers in news feeds,
 * and populates templates/feed.*.htm for sidebar display.


 *
 */


// switch to webroot
chdir(dirname(__DIR__));


#-- RSS
$feeds = array(
    "reddit" => "http://www.reddit.com/r/linux/.rss",
    "linuxcom" => "http://www.linux.com/news/software?format=feed&type=rss",

    "linuxgames" => "http://www.linuxgames.com/feed",


    "sourceforge" => "http://sourceforge.net/directory/release_feed/",
    "distrowatch" => "http://distrowatch.com/news/dwd.xml",
    "beopen" => "http://beopen.bplaced.net/category/projects/feed/",
);
$filter = 
    "/Please 'report' off-topic|namelessrom|machomebrew/"
;

#-- Traverse and collect entries
foreach ($feeds as $name=>$url) {

    // data

    $output = "";
    $x = file_get_contents($url);
    $x = preg_replace("/[^\x20-\x7F\s]/", "", $x);
    $x = simplexml_load_string($x);
    
    // append
    $i = 0;




|



>
>










<
|
>
|
>
>
|
|
|









>







1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
<?php
/**
 * title: Article feeds
 * description: Queries a few online resources for article links
 * version: 0.5
 *
 * Highlights version numbers in news feeds,
 * and populates templates/feed.*.htm for sidebar display.
 *
 * Some of the collected entries (*games) are displayed togerther in sidebar blocks.
 *
 */


// switch to webroot
chdir(dirname(__DIR__));


#-- RSS
$feeds = array(

    "linuxcom,7" => "http://www.linux.com/news/software?format=feed&type=rss",
    "reddit,17" => "http://www.reddit.com/r/linux/.rss",
    "linuxgames,5" => "http://www.linuxgames.com/feed",
    "gamingonlinux,4" => "http://www.gamingonlinux.com/article_rss.php",
    "freegamer,3" => "http://freegamer.blogspot.com/feeds/posts/default?alt=rss",
    "sourceforge,22" => "http://sourceforge.net/directory/release_feed/",
    "distrowatch,15" => "http://distrowatch.com/news/dwd.xml",
    "beopen,7" => "http://beopen.bplaced.net/category/projects/feed/",
);
$filter = 
    "/Please 'report' off-topic|namelessrom|machomebrew/"
;

#-- Traverse and collect entries
foreach ($feeds as $name=>$url) {

    // data
    list($name, $max) = str_getcsv($name);
    $output = "";
    $x = file_get_contents($url);
    $x = preg_replace("/[^\x20-\x7F\s]/", "", $x);
    $x = simplexml_load_string($x);
    
    // append
    $i = 0;
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
                    $title = preg_replace("~(\d+\.[\d-.]+)~", "<em>$0</em>", $title);
                    $output .="<a href=\"$link\">$title</a>\n";
                    $i++;
                }
                break;
        }

        if ($i >= 20) { break; }
    }
    
    // save

    file_put_contents("./template/feed.$name.htm", $output);

}


#-- Scraping

// Fossies
include("./shared.phar");
curl::$defaults["useragent"] = "freshcode/0.6 (Linux x86-64; curl) projects-autoupdate/0.5 (screenshots,changelog,regex,xpath) +http://freshcode.club/";
if ($html = curl("http://fossies.org/linux/misc/index_n.html")->exec()
and preg_match_all("~<TR>.+?</TR>~s", $html, $line))
{
    $output = "";
    # <TR><TD VALIGN="top"><A HREF="openmpi-1.8.2.tar.gz"><IMG SRC="/dl.gif"
    # class="dl" title="[Download]" ALT=""></A></TD><TD> <A
    # HREF="openmpi-1.8.2.tar.gz/" title="Contents, browsing \&amp; more
    # ..."><B>openmpi-1.8.2.tar.gz</B></A> (25 Aug 19:39, 19779476 Bytes) <IMG
    # SRC="/warix/new1.gif" class="new_nb" ALT="*NEW*"><BR><DIV class="desc"><A
    # HREF="http://www.open-mpi.org/">Open&nbsp;MPI</A> - A High Performance
    # Message Passing Library.  Open MPI is a project combining technologies and
    # resources from several other projects (FT-MPI, LA-MPI, LAM/MPI, and
    # PACX-MPI) in order to build the best MPI library available. 
    # </DIV></TD></TR>
    foreach (array_slice($line[0], 0, 22) as $html) {

        // package name and version
        preg_match("~HREF=\"([\w-]+?)-(\d[\w._-]+?)(\.(zip|tar|gz|xz|bz2|pax|tgz|txt|tbz2|7z|exe))*/\"~", $html, $pkg);
        if (count($pkg) < 3) { continue; }
        list(, $pkg, $ver, ) = $pkg;

        // package title
        preg_match("~>([^<>]+)</A>~", $html, $title);
        $title = $title[1];

        // convert date string
        preg_match("~\((\d+ \w\w\w) \d\d:\d\d~", $html, $date);
        $date = strftime("%d/%m", strtotime($date[1]));
        
        // description
        preg_match("~</A>[\s-]*([^<>]+)</DIV>~", $html, $desc);
        $desc = htmlentities($desc[1]);
                
        // combine
        $output .= "<a href=\"http://fossies.org/$pkg\" title=\"$desc\">"
                .  "<small>$date</small> $title <em>$ver</em></a>\n";
    }

    // save
    file_put_contents("./template/feed.fossies.htm", $output);
}








|



>





<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94















































                    $title = preg_replace("~(\d+\.[\d-.]+)~", "<em>$0</em>", $title);
                    $output .="<a href=\"$link\">$title</a>\n";
                    $i++;
                }
                break;
        }

        if ($i >= $max) { break; }
    }
    
    // save
    strlen($output) and
    file_put_contents("./template/feed.$name.htm", $output);

}

















































Added cron.daily/news_fossies.php.



































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<?php
/**
 * title: Fossies.org extraction
 * description: Retrieves from fossies.org and stores as sidebar feed .html.
 * version: 0.5
 *
 * Scrapes from fossies.org/linux/misc/index_n.html,
 * extracts title, version, time, and description.
 *
 * Highlights version numbers and date, adds title= description.
 *
 * Stored in ./template/feed.fossies.htm for frontpage sidebar.
 *
 */


// switch to webroot
chdir(dirname(__DIR__));


// Fossies
include("./shared.phar");
curl::$defaults["useragent"] = "freshcode/0.6 (Linux x86-64; curl) projects-autoupdate/0.5 (screenshots,changelog,regex,xpath) +http://freshcode.club/";
if ($html = curl("http://fossies.org/linux/misc/index_n.html")->exec()
and preg_match_all("~<TR>.+?</TR>~s", $html, $line))
{
    $output = "";
    # <TR><TD VALIGN="top"><A HREF="openmpi-1.8.2.tar.gz"><IMG SRC="/dl.gif"
    # class="dl" title="[Download]" ALT=""></A></TD><TD> <A
    # HREF="openmpi-1.8.2.tar.gz/" title="Contents, browsing \&amp; more
    # ..."><B>openmpi-1.8.2.tar.gz</B></A> (25 Aug 19:39, 19779476 Bytes) <IMG
    # SRC="/warix/new1.gif" class="new_nb" ALT="*NEW*"><BR><DIV class="desc"><A
    # HREF="http://www.open-mpi.org/">Open&nbsp;MPI</A> - A High Performance
    # Message Passing Library.  Open MPI is a project combining technologies and
    # resources from several other projects (FT-MPI, LA-MPI, LAM/MPI, and
    # PACX-MPI) in order to build the best MPI library available. 
    # </DIV></TD></TR>
    foreach (array_slice($line[0], 0, 22) as $html) {

        // package name and version
        preg_match("~HREF=\"([\w-]+?)-(\d[\w._-]+?)(\.(zip|tar|gz|xz|bz2|pax|tgz|txt|tbz2|7z|exe))*/\"~", $html, $pkg);
        if (count($pkg) < 3) { continue; }
        list(, $pkg, $ver, ) = $pkg;

        // package title
        preg_match("~>([^<>]+)</A>~", $html, $title);
        $title = $title[1];

        // convert date string
        preg_match("~\((\d+ \w\w\w) \d\d:\d\d~", $html, $date);
        $date = strftime("%d/%m", strtotime($date[1]));
        
        // description
        preg_match("~</A>[\s-]*([^<>]+)</DIV>~", $html, $desc);
        $desc = htmlentities($desc[1]);
                
        // combine
        $output .= "<a href=\"http://fossies.org/$pkg\" title=\"$desc\">"
                .  "<small>$date</small> $title <em>$ver</em></a>\n";
    }

    // save
    file_put_contents("./template/feed.fossies.htm", $output);
}

Changes to template/index_sidebar.php.

32
33
34
35
36
37
38

39
40

41
42
43
44
45
46
47
48
49
50
51





52
53
54

    <section class="article-links untrimmed">
        <h5>DistroWatch</h5>
        <?php  include("template/feed.distrowatch.htm");  ?>
    </section>

    <section class="article-links trimmed">

        <h5>LinuxGames</h5>
        <?php  include("template/feed.linuxgames.htm");  ?>

    </section>

    <section class="article-links untrimmed">
        <h5>Sourceforge Files</h5>
        <?php  include("template/feed.sourceforge.htm");  ?>
    </section>

    <section class="article-links trimmed">
        <h5>beOpen</h5>
        <?php  include("template/feed.beopen.htm");  ?>
    </section>






 </aside>








>
|

>











>
>
>
>
>



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61

    <section class="article-links untrimmed">
        <h5>DistroWatch</h5>
        <?php  include("template/feed.distrowatch.htm");  ?>
    </section>

    <section class="article-links trimmed">
        <h5>Games <a href="http://www.linuxgames.com/" style=display:inline>LG</a>, <a href="http://www.gamingonlinux.com/" style=display:inline>GoL</a>, <a href="http://freegamer.blogspot.com/" style=display:inline>FG</a></h5>
        <?php  include("template/feed.gamingonlinux.htm");  ?>
        <?php  include("template/feed.linuxgames.htm");  ?>
        <?php  include("template/feed.freegamer.htm");  ?>
    </section>

    <section class="article-links untrimmed">
        <h5>Sourceforge Files</h5>
        <?php  include("template/feed.sourceforge.htm");  ?>
    </section>

    <section class="article-links trimmed">
        <h5>beOpen</h5>
        <?php  include("template/feed.beopen.htm");  ?>
    </section>

    <section class="article-links trimmed">
        <h5>GitHub releases</h5>
        <?php  include("template/feed.github.htm");  ?>
    </section>

 </aside>