Python code

Discussion about Schedules Direct grabber code and data formats.

Python code

Postby pallaire » Mon Mar 17, 2008 7:27 am

Hello

Does somebody have a sample code in Python to download the schedule ? Hopefully without installing a bunch of plugins.

thanks
pallaire
 
Posts: 1
Joined: Mon Mar 17, 2008 7:25 am

Re: Python code

Postby kmedcalf » Sat Sep 13, 2008 7:20 am

Simple Retrieval of XML data with Python.

Usage with gzipped=True may or may not work. Retrieval with the defaults works just peachy and results in a latin-1 encoded XML document stored in the file specified.

Simply provide a username and password and the data will be retrieved and stored to disk.

If you break it, you own both halves.

Code: Select all
# This module retrieves SchedulesDirect XML data using a hand-coded SOAP request.
#
# The code is released into the Public Domain.  If you break it, you own both halves.
#
# Original Code by Keith Medcalf, kmedcalf@dessus.com

import codecs
import encodings
import gzip
import string
import sys
import time
import urllib2
import urlparse

def FetchXML(userName,
             passWord,
             URL='http://webservices.schedulesdirect.tmsdatadirect.com/schedulesdirect/tvlistings/xtvdService',
             Realm='TMSWebServiceRealm',
             predays=0,
             postdays=14,
             fileName='ddata.xml',
             fileCoding='latin-1',
             gzipped = False):
    cur = time.time()
    startTime = time.strftime( '%Y-%m-%dT00:00:00Z', time.gmtime( cur +  ( predays * 86400.0 ) ) )
    endTime   = time.strftime( '%Y-%m-%dT00:00:00Z', time.gmtime( cur + ( postdays * 86400.0 ) ) )
    strSoap = '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' \
              ' <SOAP-ENV:Body>\n' \
              '  <m:download xmlns:m="urn:TMSWebServices" SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">\n' \
              '   <startTime xsi:type="xsd:dateTime">' + startTime + '</startTime>\n' \
              '   <endTime xsi:type="xsd:dateTime">' + endTime + '</endTime>\n' \
              '  </m:download>\n' \
              ' </SOAP-ENV:Body>\n' \
              '</SOAP-ENV:Envelope>'
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Retrieving DataDirect TV Schedules"
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Requesting", startTime, "to", endTime
    authinfo = urllib2.HTTPDigestAuthHandler()
    authinfo.add_password(Realm, urlparse.urlparse(URL)[1], userName, passWord)
    request = urllib2.Request(URL, strSoap)
    if gzipped:
        request.add_header('Accept-encoding', 'gzip')
        if fileName[-3:].lower() == '.gz':
            fileName = fileName[:-3]
        fileName += '.gz'
    opener = urllib2.build_opener(authinfo)
    urllib2.install_opener(opener)
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), 'Saving XML to File: ' + fileName + ', Encoding: ' + fileCoding
    fileObj = None
    if fileCoding == 'native':
        urldata = opener.open(request)
        outfile = open(fileName,'wb',262144)
        repenc = False
    elif not gzipped:
        urldata = codecs.getreader('utf-8')(opener.open(request), errors='replace')
        outfile = codecs.open(fileName,'wb', fileCoding, 'replace', 262144)
        repenc = True
    else:
        raise ValueError('Codepage Translation of GZIP data not supported')
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), 'Receiving XML Data', ' '*30,
    fmt = ('\b'*30) + '%6d KB, %3d KB/s, %3d KB/s'
    data = 'X'
    bytes = 0
    currb = 0
    first = time.time()
    last = time.time() - 1
    while data:
        data = urldata.read(8192)
        b = len(data)
        bytes += b
        currb += b
        if repenc:
            data = string.replace(data, "encoding='utf-8'", "encoding='"+fileCoding+"'")
            repenc = False
        if data:
            outfile.write(data)
        curr = time.time()
        diff = curr - last
        if diff >= 0.999:
            print fmt % ((bytes//1024), currb//1024//(curr-last), bytes//1024//(curr-first)),
            last = curr
            currb = 0
    urldata.close()
    outfile.close()
    if fileObj:
        fileObj.close()
    print fmt % ((bytes//1024), 0, bytes//1024//(curr-first))
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Data Retrieval Complete"

if __name__ == '__main__':
    userName = "<username>"
    password = "<password>"
    FetchXML(userName, password)

kmedcalf
 
Posts: 7
Joined: Sat Sep 13, 2008 7:01 am

Re: Python code

Postby dave256 » Wed Oct 22, 2014 7:01 am

Any chance you (or someone else) could post an updated version that works on Yosemite (Mac OS X 10.10) and with the necessary changes for the new URLs?

Thanks,
Dave
dave256
 
Posts: 3
Joined: Thu Jul 15, 2010 10:08 am

Re: Python code

Postby kmedcalf » Wed Nov 05, 2014 5:36 pm

Here is updated code with the passwords removed -- it is currently working for me against the new servers.

There are two changes:
1) the URL is changed to point to the new server
2) the HTTP Authorization Handler is changed from HTTPDigestAuthHandler() to HTTPBasicAuthHandler()

The new server does not presently support Digest authentication -- though it may again in the future.

Code: Select all
# This module retrieves SchedulesDirect XML data using a hand-coded SOAP request.
#
# The code is released into the Public Domain.  If you break it, you own both halves.
#
# Original Code by Keith Medcalf, kmedcalf@dessus.com

import codecs
import encodings
import gzip
import math
import string
import sys
import time
import urllib2
import urlparse

def FetchXML(userName,
             passWord,
#             URL='http://webservices.schedulesdirect.tmsdatadirect.com/schedulesdirect/tvlistings/xtvdService',
             URL='http://dd.schedulesdirect.org/schedulesdirect/tvlistings/xtvdService',
             Realm='TMSWebServiceRealm',
             predays=0,
             postdays=14,
             fileName='ddata.xml',
             fileCoding='latin-1',
             gzipped = False):
    cur = time.time()
    startTime = time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(cur +  (predays * 86400.0)))
    endTime   = time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(cur + (postdays * 86400.0)))
    strSoap = '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' \
              ' <SOAP-ENV:Body>\n' \
              '  <m:download xmlns:m="urn:TMSWebServices" SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">\n' \
              '   <startTime xsi:type="xsd:dateTime">' + startTime + '</startTime>\n' \
              '   <endTime xsi:type="xsd:dateTime">' + endTime + '</endTime>\n' \
              '  </m:download>\n' \
              ' </SOAP-ENV:Body>\n' \
              '</SOAP-ENV:Envelope>'
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Retrieving DataDirect TV Schedules"
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Requesting", startTime, "to", endTime
#    authinfo = urllib2.HTTPDigestAuthHandler()
    authinfo = urllib2.HTTPBasicAuthHandler()
    authinfo.add_password(Realm, urlparse.urlparse(URL)[1], userName, passWord)
    request = urllib2.Request(URL, strSoap)
    if gzipped:
        request.add_header('Accept-encoding', 'gzip')
        if fileName[-3:].lower() == '.gz':
            fileName = fileName[:-3]
        fileName += '.gz'
    opener = urllib2.build_opener(authinfo)
    urllib2.install_opener(opener)
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), 'Saving XML to File: ' + fileName + ', Encoding: ' + fileCoding
    fileObj = None
    if fileCoding == 'native':
        urldata = opener.open(request)
        outfile = open(fileName,'wb',262144)
        repenc = False
    elif not gzipped:
        urldata = codecs.getreader('utf-8')(opener.open(request), errors='replace')
        outfile = codecs.open(fileName,'wb', fileCoding, 'replace', 262144)
        repenc = True
    else:
        raise ValueError('Codepage Translation of GZIP data not supported')

    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), 'Receiving XML Data', ' '*44,
    fmt = ('\b'*44) + '%6d KB, %4d KB/s, %4d KB/s (RS %3d KB)'
    data = 'X'
    bytes = 0
    currb = 0
    first = time.time()
    last = first
    size = 8192
    time.sleep(1)
    while data:
        data = urldata.read(size)
        b = len(data)
        bytes += b
        currb += b
        if repenc:
            data = string.replace(data, "encoding='utf-8'", "encoding='"+fileCoding+"'")
            repenc = False
        if data:
            outfile.write(data)
        curr = time.time()
        diff = curr - last
        if diff >= 0.999:
            print fmt % ((bytes//1024), currb//1024//(curr-last), bytes//1024//(curr-first), size // 1024),
            size =max(8192, 2 ** int(math.log(bytes / (curr - first) / 5, 2)))
            last = curr
            currb = 0
    urldata.close()
    outfile.close()
    if fileObj:
        fileObj.close()
    print fmt % ((bytes//1024), 0, bytes//1024//(curr-first), size // 1024)
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Data Retrieval Complete"

if __name__ == '__main__':
    userName = "xxxxxxxx"
    password = "xxxxxxxx"
    FetchXML(userName, password, fileCoding='native')
kmedcalf
 
Posts: 7
Joined: Sat Sep 13, 2008 7:01 am

Re: Python code

Postby emomo » Fri Nov 07, 2014 3:45 pm

kmedcalf wrote:Here is updated code with the passwords removed -- it is currently working for me against the new servers.

There are two changes:
1) the URL is changed to point to the new server
2) the HTTP Authorization Handler is changed from HTTPDigestAuthHandler() to HTTPBasicAuthHandler()

The new server does not presently support Digest authentication -- though it may again in the future.

Code: Select all
# This module retrieves SchedulesDirect XML data using a hand-coded SOAP request.
#
# The code is released into the Public Domain.  If you break it, you own both halves.
#
# Original Code by Keith Medcalf, kmedcalf@dessus.com

import codecs
import encodings
import gzip
import math
import string
import sys
import time
import urllib2
import urlparse

def FetchXML(userName,
             passWord,
#             URL='http://webservices.schedulesdirect.tmsdatadirect.com/schedulesdirect/tvlistings/xtvdService',
             URL='http://dd.schedulesdirect.org/schedulesdirect/tvlistings/xtvdService',
             Realm='TMSWebServiceRealm',
             predays=0,
             postdays=14,
             fileName='ddata.xml',
             fileCoding='latin-1',
             gzipped = False):
    cur = time.time()
    startTime = time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(cur +  (predays * 86400.0)))
    endTime   = time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(cur + (postdays * 86400.0)))
    strSoap = '<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:SOAP-ENC="http://schemas.xmlsoap.org/soap/encoding/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">\n' \
              ' <SOAP-ENV:Body>\n' \
              '  <m:download xmlns:m="urn:TMSWebServices" SOAP-ENV:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">\n' \
              '   <startTime xsi:type="xsd:dateTime">' + startTime + '</startTime>\n' \
              '   <endTime xsi:type="xsd:dateTime">' + endTime + '</endTime>\n' \
              '  </m:download>\n' \
              ' </SOAP-ENV:Body>\n' \
              '</SOAP-ENV:Envelope>'
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Retrieving DataDirect TV Schedules"
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Requesting", startTime, "to", endTime
#    authinfo = urllib2.HTTPDigestAuthHandler()
    authinfo = urllib2.HTTPBasicAuthHandler()
    authinfo.add_password(Realm, urlparse.urlparse(URL)[1], userName, passWord)
    request = urllib2.Request(URL, strSoap)
    if gzipped:
        request.add_header('Accept-encoding', 'gzip')
        if fileName[-3:].lower() == '.gz':
            fileName = fileName[:-3]
        fileName += '.gz'
    opener = urllib2.build_opener(authinfo)
    urllib2.install_opener(opener)
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), 'Saving XML to File: ' + fileName + ', Encoding: ' + fileCoding
    fileObj = None
    if fileCoding == 'native':
        urldata = opener.open(request)
        outfile = open(fileName,'wb',262144)
        repenc = False
    elif not gzipped:
        urldata = codecs.getreader('utf-8')(opener.open(request), errors='replace')
        outfile = codecs.open(fileName,'wb', fileCoding, 'replace', 262144)
        repenc = True
    else:
        raise ValueError('Codepage Translation of GZIP data not supported')

    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), 'Receiving XML Data', ' '*44,
    fmt = ('\b'*44) + '%6d KB, %4d KB/s, %4d KB/s (RS %3d KB)'
    data = 'X'
    bytes = 0
    currb = 0
    first = time.time()
    last = first
    size = 8192
    time.sleep(1)
    while data:
        data = urldata.read(size)
        b = len(data)
        bytes += b
        currb += b
        if repenc:
            data = string.replace(data, "encoding='utf-8'", "encoding='"+fileCoding+"'")
            repenc = False
        if data:
            outfile.write(data)
        curr = time.time()
        diff = curr - last
        if diff >= 0.999:
            print fmt % ((bytes//1024), currb//1024//(curr-last), bytes//1024//(curr-first), size // 1024),
            size =max(8192, 2 ** int(math.log(bytes / (curr - first) / 5, 2)))
            last = curr
            currb = 0
    urldata.close()
    outfile.close()
    if fileObj:
        fileObj.close()
    print fmt % ((bytes//1024), 0, bytes//1024//(curr-first), size // 1024)
    print '#', time.strftime('%Y/%m/%d %H:%M:%S'), "Data Retrieval Complete"

if __name__ == '__main__':
    userName = "xxxxxxxx"
    password = "xxxxxxxx"
    FetchXML(userName, password, fileCoding='native')


I tried to use the code above today, but got Connection refused Error...
Code: Select all
 python test_sd.py
# 2014/11/07 13:13:10 Retrieving DataDirect TV Schedules
# 2014/11/07 13:13:10 Requesting 2014-11-07T00:00:00Z to 2014-11-21T00:00:00Z
# 2014/11/07 13:13:10 Saving XML to File: ddata.xml, Encoding: native
Traceback (most recent call last):
  File "test_sd.py", line 100, in <module>
    FetchXML(userName, password, fileCoding='native')
  File "test_sd.py", line 54, in FetchXML
    urldata = opener.open(request)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 404, in open
    response = self._open(req, data)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 422, in _open
    '_open', req)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 382, in _call_chain
    result = func(*args)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1214, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1184, in do_open
    raise URLError(err)
urllib2.URLError: <urlopen error [Errno 61] Connection refused>
(slipper)➜  slipper git:(sdfix) ✗ python test_sd.py
# 2014/11/07 13:13:13 Retrieving DataDirect TV Schedules
# 2014/11/07 13:13:13 Requesting 2014-11-07T00:00:00Z to 2014-11-21T00:00:00Z
# 2014/11/07 13:13:13 Saving XML to File: ddata.xml, Encoding: native
Traceback (most recent call last):
  File "test_sd.py", line 100, in <module>
    FetchXML(userName, password, fileCoding='native')
  File "test_sd.py", line 54, in FetchXML
    urldata = opener.open(request)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 404, in open
    response = self._open(req, data)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 422, in _open
    '_open', req)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 382, in _call_chain
    result = func(*args)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1214, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1184, in do_open
    raise URLError(err)
urllib2.URLError: <urlopen error [Errno 61] Connection refused>
(slipper)➜  slipper git:(sdfix) ✗ python test_sd.py
# 2014/11/07 13:13:15 Retrieving DataDirect TV Schedules
# 2014/11/07 13:13:15 Requesting 2014-11-07T00:00:00Z to 2014-11-21T00:00:00Z
# 2014/11/07 13:13:15 Saving XML to File: ddata.xml, Encoding: native
Traceback (most recent call last):
  File "test_sd.py", line 100, in <module>
    FetchXML(userName, password, fileCoding='native')
  File "test_sd.py", line 54, in FetchXML
    urldata = opener.open(request)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 404, in open
    response = self._open(req, data)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 422, in _open
    '_open', req)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 382, in _call_chain
    result = func(*args)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1214, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "/usr/local/Cellar/python/2.7.6_1/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib2.py", line 1184, in do_open
    raise URLError(err)
urllib2.URLError: <urlopen error [Errno 61] Connection refused>


any idea?
emomo
 
Posts: 1
Joined: Fri Nov 07, 2014 2:18 pm


Return to Developers Corner

Who is online

Users browsing this forum: No registered users and 2 guests