# encoding: ascii
# api: powershell
# title: Get-WebVideoFile
# description: Download video-files from the specified RSS-feed URL, based on HTML scraping and a regular expression for finding the download URL.Joel Bennett`s Get-WebFile function from poshcode.org, which provides progress status during download, is used for downloading the files.
# version: 1.0
# type: script
# author: Jan Egil Ring
# license: CC0
# function: Get-WebFile
# x-poshcode-id: 3399
# x-archived: 2012-05-13T23:54:36
# x-published: 2012-05-06T04:37:00
#
#
<#
.SYNOPSIS
Download video-files from the specified RSS-feed URL, based on HTML scraping and a regular expression.
.DESCRIPTION
Download video-files from the specified RSS-feed URL, based on HTML scraping and a regular expression for finding the download URL.
Joel Bennett`s Get-WebFile function from poshcode.org, which provides progress status during download, is used for downloading the files.
The script was originally created for downloading wmv-files from Microsoft TechNet Edge (http://technet.microsoft.com/en-us/edge).
.PARAMETER RssUrl
The URL for the RSS feed to process
.PARAMETER destination
The destination-folder for the downloaded video files. If not specified, the downloaded files will be placed in the current user`s Video-folder ($home\Videos).
.PARAMETER UseOriginalFileName
Switch-parameter to specify usage of original filenames. If not specified the RSS title will be used as filename.
.PARAMETER UrlRegex
A regular expression used to search for video URL`s. If not specified a regular expression for finding wmv-files on TechNet Edge is used.
.EXAMPLE
.\Get-WebVideoFile.ps1 -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
.EXAMPLE
.\Get-WebVideoFile.ps1 -Destination "C:\TechNet Edge Videos\" -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
.EXAMPLE
.\Get-WebVideoFile.ps1 -UseOriginalFileName -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
.EXAMPLE
.\Get-WebVideoFile.ps1 -Verbose -RssUrl "http://technet.microsoft.com/en-us/edge/SyndicationGetTopics/cc543196.aspx?field=Category&value=System Center 2012&ancestor=ff524487&version=MSDN.10"
.NOTES
Name: Get-WebVideoFile.ps1
Author: Jan Egil Ring
Website: http://blog.powershell.no
Usage:
1) Find and browse to the category you want to download files from. Available categories: http://technet.microsoft.com/en-us/edge/ff701756
2) Find the RSS URL by clicking the RSS-icon next to the category title on the top of the website
3) Specify the URL on the URL-parameter: .\Get-WebVideoFile.ps1 -RssUrl "http://technet.microsoft.com/en-us/edge/Syndication..."
You have a royalty-free right to use, modify, reproduce, and
distribute this script file in any way you find useful, provided that
you agree that the creator, owner above has no warranty, obligations,
or liability for such use.
VERSION HISTORY:
1.0 05.05.2012 - Initial release
#Requires -Version 2.0
#>
Param(
[Parameter(Mandatory=$true)]
[string]$RssUrl,
[string]$Destination = "$home\Videos\",
[switch]$UseOriginalFileName,
[regex]$UrlRegex = "(?<url>http://content\d.catalog.video.msn.com/../../[0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12}(?<file>[^>]*?wmv))"
)
# Get-WebFile function from http://poshcode.org/3219
function Get-WebFile {
param(
$url = (Read-Host "The URL to download"),
$fileName = $null,
[switch]$Passthru,
[switch]$quiet
)
if($url.contains("http"))
{
$req = [System.Net.HttpWebRequest]::Create($url);
}
else
{
$URL_Format_Error = [string]"Connection protocol not specified. Recommended action: Try again using protocol (for example 'http://" + $url + "') instead. Function aborting...";
Write-Error $URL_Format_Error;
return;
}
$req.CookieContainer = New-Object System.Net.CookieContainer
try{
$res = $req.GetResponse();
}
catch
{
Write-Error $error[0].Exception.InnerException.Message;
return;
}
if($fileName -and !(Split-Path $fileName)) {
$fileName = Join-Path (Get-Location -PSProvider "FileSystem") $fileName
}
elseif((!$Passthru -and ($fileName -eq $null)) -or (($fileName -ne $null) -and (Test-Path -PathType "Container" $fileName)))
{
[string]$fileName = ([regex]'(?i)filename=(.*)$').Match( $res.Headers["Content-Disposition"] ).Groups[1].Value
$fileName = $fileName.trim("\/""'")
if(!$fileName) {
$fileName = $res.ResponseUri.Segments[-1]
$fileName = $fileName.trim("\/")
if(!$fileName) {
$fileName = Read-Host "Please provide a file name"
}
$fileName = $fileName.trim("\/")
if(!([IO.FileInfo]$fileName).Extension) {
$fileName = $fileName + "." + $res.ContentType.Split(";")[0].Split("/")[1]
}
}
$fileName = Join-Path (Get-Location -PSProvider "FileSystem") $fileName
}
if($Passthru) {
$encoding = [System.Text.Encoding]::GetEncoding( $res.CharacterSet )
[string]$output = ""
}
if($res.StatusCode -eq 200) {
[long]$goal = $res.ContentLength
$reader = $res.GetResponseStream()
if($fileName) {
try{
$writer = new-object System.IO.FileStream $fileName, "Create"
}
catch{
Write-Error $error[0].Exception.InnerException.Message;
return;
}
}
[byte[]]$buffer = new-object byte[] 4096
[long]$total = [long]$count = 0
do
{
$count = $reader.Read($buffer, 0, $buffer.Length);
if($fileName) {
$writer.Write($buffer, 0, $count);
}
if($Passthru){
$output += $encoding.GetString($buffer,0,$count)
} elseif(!$quiet) {
$total += $count
if($goal -gt 0) {
Write-Progress "Downloading $url" "Saving $total of $goal" -id 0 -percentComplete (($total/$goal)*100)
} else {
Write-Progress "Downloading $url" "Saving $total bytes..." -id 0
}
}
} while ($count -gt 0)
$reader.Close()
if($fileName) {
$writer.Flush()
$writer.Close()
}
if($Passthru){
$output
}
}
$res.Close();
if($fileName) {
ls $fileName
}
}
$wc = New-Object net.webclient
[xml]$xml = $wc.DownloadString($rssurl)
$itemcount = $xml.rss.channel.item.count
$count = 0
$xml.rss.channel.item | foreach {
$count ++
Write-Verbose "Processing RSS item $count of $itemcount : $($_.title)"
$string = $wc.DownloadString($_.link)
if ($string -match $urlregex) {
Write-Verbose "URL regex matched"
$url = $matches.url
}
else {
Write-Verbose "URL regex did not match"
return
}
if ($UseOriginalFileName) {
$file = $url.split("/")[-1]
}
else {
$file = $_.Title
# Remove illegal filename characters / ? * : ; { } \ |
foreach ($character in ('/','?','*',':',';','{','}','\','|')) {
$file = $file.Replace($character,'')
}
$file = $file + '.' + $url.split(".")[-1]
}
if ($url) {
$filepath = "$destination$file"
if (Test-Path $filepath)
{Write-Verbose "$file is already present"}
else {
Write-Verbose "Downloading $file"
Get-WebFile -url $url -filename $filepath
}
}
}