This is a small script to help people get the URL for the Football Matches and Show content without the advertisements. It figures out the URL of the on the Content Delivery Network.
You will need to install a few items before being able to use. They include:
- Python 3
- Requests python library: run the python3 program and then '
pip3 install requests' - BeautifulSoup4 python library: run the python3 program and then '
pip3 install beautifulsoup4'
Save the contents below to a file and then run a shell.
#!/usr/bin/python3
"""
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import requests
from bs4 import BeautifulSoup
from requests import RequestException
import sys
def get_cdn_url(fmas_url):
try:
r = requests.get(fmas_url)
soup = BeautifulSoup(r.text, 'html.parser')
playwire = soup.find_all('script', {'data-config': True}) # find the all important 'data-config' tag
if not playwire:
raise DataConfigNotFoundException
# this is all a bit kludgey... should be an easier way, perhaps lxml?
data_tag = str(playwire[0]) # the playwire variable is a BeautifulSoup Resultset which is just a list.
parts = [segment.split('/') for segment in data_tag.split(' ') if 'data-config' in segment][0]
numbers = [part for part in parts if part.isnumeric()]
# the content distribution network URL for the episode (sans advertisements)
return 'https://cdn.video.playwire.com/{}/videos/{}/video-sd.mp4'.format(numbers[0], numbers[1])
except RequestException as e:
print('''There was an error with the URL you provided. Please make sure it's from Full Matches And Shows.
e.g. http://www.fullmatchesandshows.com/2017/01/01/arsenal-vs-crystal-palace-highlights-full-match-2/''')
print(e)
sys.exit(1)
except DataConfigNotFoundException as e:
print('The data-config element was not found. This probably means an unsupported page is being scraped.')
print(e)
sys.exit(1)
except Exception as e:
print('Non-descript exception occurred')
print(e)
sys.exit(1)
class DataConfigNotFoundException(BaseException):
"""The data-config element could not be found and so it's not the right type of page to scrape"""
if __name__ == '__main__':
game_url = input('Please enter a url:').strip()
print(get_cdn_url(game_url))
Enjoy!