PoshCode Archive  Artifact [1798846d80]

Artifact 1798846d80c3e5786114a931069bc43fffb710cbef88cb6f993eb2bd9e278d30:

  • File Parse-youtube-links.ps1 — part of check-in [1bc0519ea3] at 2018-06-10 14:04:03 on branch trunk — Function takes search term and number of pages as input parameters. Depending on number of pages given (user: Syslq size: 2628)

# encoding: ascii
# api: powershell
# title: Parse youtube links
# description: Function takes search term and number of pages as input parameters. Depending on number of pages given
# version: 0.1
# type: script
# author: Syslq
# license: CC0
# x-poshcode-id: 5918
# x-derived-from-id: 5922
# x-archived: 2015-10-06T20:51:50
# x-published: 2015-07-06T08:24:00
#
# function will return all direct video url from these pages for given search parameter. If filePath is 
# specified the function will write links to file instead of outputing them to screen.
# It is implemented via web page parsing, not via REST API, so script does not need OAuth Authorization
# Powershell v3 or higher is assumed as it relies on Invoke-Webrequest
#
<#
.SYNOPSIS
Gets direct youtube links for given search term

.SYNTAX
GetYoutubeLinks [-searchTerm <String[]>] [-numOfPages <Int[]>] [-FilePath <String[]>]

.DESCRIPTION
Function takes search term and number of pages as input parameters. Depending on number of pages given
function will return all direct video url from these pages for given search parameter. If filePath is 
specified the function will write links to file instead of outputing them to screen.
It is implemented via web page parsing, not via REST API, so script does not need OAuth Authorization
Powershell v3 or higher is assumed as it relies on Invoke-Webrequest

.EXAMPLE
GetYoutubeLinks -searchTerm 'I like powershell' -numOfPages 3

.EXAMPLE
GetYoutubeLinks -searchTerm 'I like powershell' -numOfPages 7 -filePath "D:\ILikePowershell.txt"
#>

function GetYoutubeLinks ($searchTerm, $numOfPages, [string]$filePath)
{
  $youtube = 'https://www.youtube.com'  
  $allLinks = New-Object System.Collections.ArrayList

  function ParseLinks($web_links)
  {
    foreach($link in $web.Links)
      {
       if(($link.href -like '*/watch?v=*') -and ($link.href -notlike '*;list=*'))
       {
         $linkEntry = $youtube + $link.href
         $allLinks.Add($linkEntry)  
       }      
      }
   }

  if($numOfPages -eq 1)
  {
    $youTubePage = "$youtube" + "/results?search_query=" + $searchTerm
    $web = Invoke-WebRequest -Uri "$youTubePage"
    
    ParseLinks -web_links $web  
  }

  else
  {
    for($i=0; $i -le $numOfPages; $i++)
    {
      $youTubePage = "$youtube" + "/results?search_query=" + $searchTerm + "&page=$i"
      $web = Invoke-WebRequest -Uri "$youTubePage"

      ParseLinks $web     
    }
  }
  
  if($filePath.Length -lt 1)
  {
    return $allLinks
  }

  else
  {
    $allLinks | Out-File -FilePath $filePath
  }
}