⌈⌋ ⎇ branch:  freshcode


Artifact [c171210958]

Artifact c17121095864b32efb5730d73004b23ebedddffd:

  • File submit_import.php — part of check-in [6c29fe0c80] at 2014-10-12 04:30:04 on branch trunk — Add implicit SourceForge= url for SF-imports. (user: mario size: 12458)

<?php
/**
 * api: freshcode
 * title: Import project description
 * description: Allow DOAP/JSON/etc. import prior manual /submit form intake.
 * version: 0.5
 *
 *
 * Checks for uploaded $_FILES or ?import_url=
 *  → Deciphers project name, description, license, tags, etc.
 *  → Passes on extra $data to /submit <form>
 *
 */



define("UP_IMPORT_TYPE", "import_via");
define("UP_IMPORT_FILE", "import_file");
define("UP_IMPORT_NAME", "import_name");



/**
 * Invoked by page_submit itself to populate any empty $data set.
 *
 */
class project_import {


    /**
     * Evaluate request params, and import data if any.
     *
     */
    static function fetch($data=NULL) {
    
        #-- file upload?
        if (!empty($_FILES[UP_IMPORT_FILE]["tmp_name"])) {
            $data = file_get_contents($_FILES[UP_IMPORT_FILE]["tmp_name"]);
        }
        
        #-- import scheme, and project name
        $type = $_REQUEST->id[UP_IMPORT_TYPE];
        $name = $_REQUEST->text[UP_IMPORT_NAME];

        if ($type and ($data or $name)) {
            $i = new self;
            return (array)@($i->convert($type, $data, $name));
        }
        else {
            return array();
        }
    }

    
    /**
     * Dispatch to submodules.
     *
     */
    function convert($type, $data, $name) {
    
        #-- switch to fetch methods
        switch (strtoupper($type)) {

            case "JSON":
               return $this->JSON($data);

            case "PKG-INFO":
            case "PKGINFO":
            case "LSM":
            case "DEBIAN":
            case "RPMSPEC":
               return $this->PKG_INFO($data);

            case "DOAP":
               return $this->DOAP($data);

            case "FREECODE":
               return $this->FREECODE($name);

            case "SOURCEFORGE":
               return $this->SOURCEFORGE($name);

            default:
               return array();
        }
    }

    
    
    /**
     * Extract from common JSON formats.
     *
     *   release.json  common.js     package.json  bower.json    composer.json   pypi.json
     *   ------------- ------------- ------------- ------------- --------------- -------------
     *   name          name          name          name          name            name
     *   version       version       version       version       version         version
     *   title                                                                     
     *   description   description   description   description   description     description
     *   homepage      homepage      homepage      homepage      homepage        home_page
     *   license       licenses*     license       license*      license         license
     *   image
     *   state                                                                   classifiers
     *   download                    repository    repository                    download_url
     *   urls*         repositories                              repositories    release_url
     *   tags          keywords      keywords      keywords      keywords        keywords
     *   trove                                                                   classifiers
     *
     */
    function JSON($data) {
    
        // check if it is actually json
        if ($data = json_decode($data, TRUE)) {


            // rename a few plain fields
            $map = array(
                "name" => "title",            // title is commonly absent
                "screenshot" => "image",
                "home_page" => "homepage",    // pypi
                "download_url" => "download", // pypi
                "summary" => "description",   // pypi
                "release_url" => "urls",      // pypi
            );
            foreach ($map as $old=>$new) {
                if (empty($data[$new]) and !empty($data[$old])) {
                    $data[$to] = $data[$from];
                }
            }


            // complex mapping
            $map = array(
                 "keywords" => "tags",
                 "classifiers" => "tags",
                 "licenses" => "license",
                 "license" => "license",
                 "repository" => "urls",
                 "repositories" => "urls",
                 "urls" => "urls",
            );
            foreach ($map as $old=>$new) {
                if (!empty($data[$old])) {
                    switch ($old) {

                        // keywords (common.js, composer.json) become tags
                        case "keywords":                        
                            $data[$new] = strtolower(join(", ", $data[$old]));
                            break;

                        // Trove classifiers (pypi)
                        case "classifiers":
                            $data[$new] = tags::trove_to_tags($data[$old]);
                            break;

                        // license alias  // see spdx.org
                        case "licenses":
                        case "license":
                            while (is_array($data[$old])) {
                                $data[$old] = current($data[$old]);
                            }
                            $data[$new] = tags::map_license($data[$old]);
                            break;

                        // URLs
                        case "repository":
                            $data[$new] = $data[$old]["type"] . "=" . $data[$old]["url"] . "\n";
                            break;
                        case "repositories":
                            $data[$new] = http_build_query(array_column($data[$old], "url", "type"), "", "\n");
                            break;
                        case "urls":
                            is_array($data[$old]) and
                            $data[$new] = http_build_query(array_column($data[$old], "url", "packagetype"), "", "\n");
                            break;
                        
                    }
                }
            }
            

            // common fields from releases.json are just kept asis
            $asis = array(
                "name", "title", "homepage", "description",
                "license", "tags", "image", "version", "state",
                "scope", "changes", "download", "urls",
                "autoupdate_module", "autoupdate_url", "autoupdate_regex",
                "submitter", "lock",
            );

            // done
            return(
                array_filter(
                    array_intersect_key($data, array_flip($asis)),
                    "is_string"
                )
            );
        }

    }



    /**
     * Extracts from PKG-INFO and other RFC822-style text files.
     *
     *  used   PKG-INFO       LSM            Debian        RPMSpec
     *  ----   -------------  -------------  ------------  -------
     *   →     Name           Title          Package       Name
     *   →     Version        Version        Version       Version
     *   →     Description    Description    Description   
     *   →     Summary                                     Summary
     *   →     Home-Page      Primary-Site   Homepage      URL
     *         Author         Author                       Vendor
     *   →     License        Coding-Policy                Copyright
     *   →     Keywords       Keywords       Section       Group
     *         Classifiers                                 
     *   →     Platform       Platforms                    
     *
     *  [1] http://legacy.python.org/dev/peps/pep-0345/
     *  [2] http://lsm.execpc.com/LSM.README
     *  [3] http://www.debian.org/doc/debian-policy/ch-controlfields.html
     *  [4] http://www.rpm.org/max-rpm/s1-rpm-build-creating-spec-file.html
     *
     */
    function PKG_INFO($data) {
    
        // Simple KEY: VALUE format (value may span multiple lines).
        preg_match_all("/
                ^ %?
                ([\w-]+): \s*
                (.+ (?:\R[\v].+$)* )
                $
            /xm", $data, $uu
        )
        and $data = array_change_key_case(array_combine($uu[1], $uu[2]), CASE_LOWER);

        // Test if it's PKG-INFO
        if (!empty($data["description"])) {

            return array(
                "title" => $data["name"] ?: $data["title"],
                "version" => $data["version"],
                "description" => $data["description"] ?: $data["summary"],
                "tags" => preg_replace("/[\s,;]+/", ", ", "$data[platform], $data[keywords]"),
              # "trove-tags" => $data["classifiers"],
                "homepage" => $data["home-page"] ?: $data["url"] ?: $data["homepage"] ?: $data["primary-site"],
                "download" => $data["download-url"],
                "license" => tags::map_license($data["license"] ?: $data["coding-policy"] ?: $data["copyright"]),
            );
        }
    }



    /**
     * Import from DOAP description.
     *
     * Would actually require a RDF toolkit,
     * but for the simple use case here, it's just processed namespace-unaware as xml.
     *
     */
    function DOAP($data) {
        if ($x = simplexml_load_string($data)->Project) {
            $x = array(
                "name" => strval($x->shortname),
                "title" => strval($x->name),
                "description" => strval($x->description ?: $x->shortdesc),
                "homepage" => strval($x->homepage["resource"]),
                "download" => strval($x->{'download-page'}["resource"]),
                "tags" => strval($x->{'programming-language'}) .", ". strval($x->category["resource"]),
                "license" => tags::map_license(basename(strval($x->license["resource"]))),
                "version" => strval($x->release->Version->revision),
            );
            return $x;
        }
    }



    /**
     * Freecodes JSON API is gone, so we have to extract from the project
     * page itself.
     *
     */
    function FREECODE($name) {

        // retrieve
        if ($html = curl("http://freecode.com/projects/$name")->exec()) {
        
            // regex extract to reduce false positives
            preg_match_all('~
                  <meta \s+ property="og:title" \s+ content="(?<title>[^"]+)"
               |  <meta \s+ name="keywords" \s+ content="(?<tags>[^"]+)"
               |  class="project-detail">  \s+  <p>  (?<description>[^<>]+)</p>
               |  >Licenses< .+? rel="tag">  (?<license>[^<>]+)</a>
               |  >Implementation< .+? rel="tag">  (?<lang>[^<>]+)</a>
            ~smix', $html, $uu, PREG_SET_ORDER);

            // join fields
            if (!empty($uu[0][0])) {
                $uu = call_user_func_array("array_merge", array_map("array_filter", $uu));
                return array(
                    "name" => $name,
                    "title" => $uu["title"],
                    "description" => $uu["description"],
                    "tags" => strtolower((!empty($uu["lang"]) ? "$uu[lang], " : "") . $uu["tags"]),
                    "license" => tags::map_license($uu["license"]),
                );
            }        
        }
    }



    /**
     * Sourceforge still provides a JSON export.
     *
     */
    function SOURCEFORGE($name) {

        // retrieve
        if ($data = json_decode(curl("https://sourceforge.net/rest/p/$name")->exec(), TRUE)) {

            // custom json extraction
            return array(
                "name" => $data["shortname"],
                "title" => $data["name"],
                "homepage" => $data["external_homepage"] ?: $data["url"],
                "description" => $data["short_description"],
                "image" => $data["screenshots"][0]["thumbnail_url"],
                "license" => tags::map_license($data["categories"]["license"][0]["fullname"]),
                "tags" => implode(", ",
                    array_merge(
                        array_column($data["categories"]["language"], "shortname"),
                        array_column($data["categories"]["environment"], "fullname"),
                        array_column($data["categories"]["topic"], "shortname")
                    )
                ),
                "state" => $data["categories"]["developmentstatus"][0]["shortname"],
                "urls" => "SourceForge = https://sourceforge.net/projects/$name\nGitHub = \n",
            );
        }
    }

}


#print_r((new project_import)->freecode("firefox"));



?>