# -*- coding: utf-8 -*-
# Elisa - Home multimedia server
# Copyright (C) 2006-2008 Fluendo Embedded S.L. (www.fluendo.com).
# All rights reserved.
#
# This file is available under one of two license agreements.
#
# This file is licensed under the GPL version 3.
# See "LICENSE.GPL" in the root of this distribution including a special
# exception to use Elisa with Fluendo's plugins.
#
# The GPL part of Elisa is also available under a commercial licensing
# agreement from Fluendo.
# See "LICENSE.Elisa" in the root directory of this distribution package
# for details on that license.

"""
StageMedia component class
"""

__maintainer__ = 'Florian Boucault <florian@fluendo.com>'
__maintainer2__ = 'Benjamin Kampmann <benjamin@fluendo.com>'

from elisa.base_components.media_provider import MediaProvider, UriNotMonitorable
from elisa.core.media_uri import MediaUri, unquote
from elisa.core.utils import deferred_action
from elisa.core import common

from elisa.extern.configobj import Section

from elisa.core.observers.dict import DictObservable

from twisted.internet import defer, threads
from twisted.internet import reactor
from twisted.web import client
#from twisted.web.client import HTTPDownloader

from BeautifulSoup import BeautifulSoup

import urllib2, urllib, cookielib
import re
from mutex import mutex

from elisa.extern.translation import gettexter, N_
T_ = gettexter('elisa-stage6')


plugin_registry = common.application.plugin_registry
InternetLocationMessage = plugin_registry.get_component_class('base:internet_location_message')

class StageParser:

    """
    This class implements a parser to retrieve video titles and
    URL from a Stage6 HTML page
    """

    # Some regexps that will help retrieve the data we are looking
    # for in HTML pages
    reg_href = re.compile("href=\"(.*)\"")
    reg_href_avatar = re.compile("href=\"(.*)\"><acronym")
    reg_img = re.compile("alt=\"(.*)\" src=\"(.*)\"")
    reg_time = re.compile("<img (.*)/></acronym>(.*)")
    reg_img_avatar = re.compile("src=\"(.*)\" alt=")
    reg_img_title = re.compile("<acronym title=\"(.*)\" class=\"no-border\">")
    reg_title = re.compile("title=\"(.*)\">(.*)</a></")
    reg_video_id = re.compile("video/(.*)/")
    reg_pages = re.compile(">(.*)</a>")
    reg_watch_type = re.compile("<div class=\"user-watch\" id=\"(.*)\">")

    def __init__(self, string_to_parse):
        """
        @param string_to_parse:         the HTML code to parse
        @type string_to_parse:          string
        """

        self._to_parse = string_to_parse


    def get_tags(self):
        """
        Returns a list of tags as strings. This is parsing the
        HTML data we have in self._to_parse

        @rtype: list of strings
        """

        tags = []
        # In case the Stage6 website is having two many connections
        could_not_connect = '<!-- Could not connect to MySQL host: Too many connections -->'
        if self._to_parse.startswith(could_not_connect):
            tags.append("Could not connect")
            return tags

        # BeautifulSoup is going to help us find the code we're looking for
        b = BeautifulSoup(self._to_parse)
        res = b.findAll('ul', attrs={'class': 'tags-drill'})

        if len(res):
            # Tag names are between <li> marks
            res = res[0].findAllNext('li')
            for tag in res:
                t = tag.contents[0]
                for i in t.attrs:
                    if i[0] == 'class':
                        title = t.attrs[0][1]
                        tags.append(title)
                        break

        return tags

    def get_pages(self):
        """
        Returns an integer representing the last page. If there are no pages
        we return a zero
        """
        b = BeautifulSoup(self._to_parse)

        res = b.findAll('a', attrs={'class' : 'pagination-number'})
        if len(res) == 0:
            return 0

        last_page = self.reg_pages.search(str(res[len(res)-1])).groups()[0]

        return last_page

    def get_watchlist(self):
        """
        Returns a list of dictionaries, which look like this:
        {'label' : '', 'href' : '', 'img': ''}
        """

        b = BeautifulSoup(self._to_parse)

        res = b.findAll('div', attrs={'class' : 'user-watch'})


        list = []

        for div in res:
            type = self.reg_watch_type.search(str(div)).groups()[0]
            avatar = div.find('div', attrs={'class': 'avatar'})
            link = self.reg_href_avatar.search(str(avatar)).groups()[0]
            avatar_img = self.reg_img_avatar.search(str(avatar)).groups()[0]
            title = self.reg_img_title.search(str(avatar)).groups()[0]

            if type.startswith('user'):
                link = "%s/videos/group:uservideos" % link
            else:
                link = "%s/videos/" % link
               
            list.append({'label' : unquote(title), 'href' : link, 'img' : avatar_img})
        
        return list

    def get_videos(self):
        """
        Returns a list of videos, with their name, URL and thumbnail. This is parsing the
        HTML data we have in self._to_parse

        @rtype : list of (string, string, string)
        """

        videos = []

        # Video info are between <div> marks with their class='video-title'
        b = BeautifulSoup(self._to_parse)
        resu = b.findAll('div', attrs={'class': 'video'})

        for res in resu:
            vid_tit = res.find('div', attrs={'class' : 'video-title'})
            href = res.findAll('img')
            href = href[len(href)-1]
            if not vid_tit:
                continue

            line = str(vid_tit)
            overlay = str(res.find('div', attrs={'class' : 'video-overlay'}))
            times = self.reg_time.search(overlay)
            if times:
                time = times.groups()[1]
            else:
                time = None



            # get the href to retrieve video id
            match = self.reg_href.search(line)
            if not match:
                continue

            # retrieve the thumbnail location
            imgp = self.reg_img.search(str(href))
            if imgp:
                img = imgp.groups()[1]
            else:
                img = ''
            
            video_id = self.reg_video_id.search(match.groups()[0])

            if video_id:
                title = self.reg_title.search(line)
                if title:
                    # Finally we add the video id, its title and thumbnail location
                    videos.append((video_id.groups()[0],
                                   title.groups()[1],
                                   img, time))

        return videos


"""
class Downloader(HTTPDownloader):

    def pageEnd(self):

        print "got file"
"""


class StageMedia(MediaProvider):
    """
    This class implements stage6 video website browsing support
    http://stage6.divx.com
    """

    # URL where we are going to look for video tags
    MAIN_URL = "http://stage6.divx.com/"
    TAGS_URL = MAIN_URL + "videos/"
    # URL where the videos are actually located
    VIDEOS_URL = "stage6://video.stage6.com/"

    config_doc = {'email' : 'The email which is registered at stage6, optional',
                  'password' : 'the password',
                  'pages' : "Show the 'other pages'-item, experimental. See"
                            " CONFIGURATION for more informations",
                  'certain_uris': "This is a list of other Stage6-Uris, see"
                                  " CONFIGURATION in the plugins-directory for"
                                  " more informations.",
                  'max_pages' : "The maximum number of other pages to show"
                  }

    default_config = {'email' : '',
                      'password' : '',
                      'pages' : 0,
                      'max_pages' : 99,
                      'certain_uris': []
                      }
    

    def __init__(self):
        """
        We init here also the base class. Caching of directory listng
        will done in the self._cache dict, protected from concurrent
        access by a mutex
        """

        MediaProvider.__init__(self)

        # We create a cache of retrieved results
        self._cache = {}
        self._mutex = mutex()
        self._loggedIn = False
        self._more_pages = 0
        self._max_pages = 99
        self._certain_uris = {}

        # Here we create a DeferredActionManager, which permits us
        # to manage a queue of deferred actions. This is useful
        # for providers which uses a data protocol which can take a
        # lot of resources, in order to have only one request at a time
        self._def_action = deferred_action.DeferredActionsManager()
        
        self._cookie = cookielib.LWPCookieJar()
        # SetUp CookieStuff
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self._cookie))
        urllib2.install_opener(opener)


    def initialize(self):
        uri = "stage6:///"
        action_type = InternetLocationMessage.ActionType.LOCATION_ADDED
        msg = InternetLocationMessage(action_type, "Stage6", 'stage6', uri,
                                        media_types=['video',],
                                        theme_icon='stage6')
        common.application.bus.send_message(msg)
        self._more_pages = self.config.get('pages', 0)
        self._max_pages = self.config.get('max_pages', 99)
        uris = self.config.get('certain_uris', [])
        
        if isinstance(uris, list):
 
            for uri in uris:
                    self._certain_uris[uri] = None
            if isinstance(self.config, Section):
                for section in self.config.sections:
                    if section in self._certain_uris.keys():
                        pass
                        ## TODO: make this work!
#                    print type(section), section, dir(section)
#                    label = section.get('label', None)
#                    self._certain_uris[section] = label
        else:
            self.warning("Could not read configuration options certain_uris."
                            "Maybe it is not a list?")

        
        # The logIn should not block ;)
        return threads.deferToThread(self._logIn)


    def clean(self):
#        self.config.set('certain_uris', self._certain_uris)
        MediaProvider.clean(self)


    def _logIn(self):

        email = self.config.get('email', '')
        pw = self.config.get('password', '')

        if email != -1 and pw != '':
            
            self.debug("Loggin in with %s:%s" % (email, pw))
            data = urllib.urlencode({'email' : email,
                                     'password' : pw,
                                     'account' : 'true'})
            req = urllib2.Request('http://www.stage6.com/users/login/login-post',
                                     data)

            try:
                handler = urllib2.urlopen(req)
            except urllib2.URLError, u:
                self.warning("Could not log into stage6 server: %s" % u)
                return
            else:
                res = handler.read()
                if res.find('Log out') != -1:
                    self.debug("Logged In")
                    self._loggedIn = True
                    return

            self.warning("Could not log in. Maybe you should check your login"
                            " infromations")


    def scannable_uri_schemes__get(self):
        # We do not need media scanning. We can provide the metadata ourselves
        return {}

    def supported_uri_schemes__get(self):
        return { 'stage6': 0 }

    def get_media_type(self, uri):
        return self._def_action.insert_action(0, self._get_media_type, uri)

    def _get_media_type(self, uri):
        # If the uri starts with the stage6 video domain
        # name, we know it is a video. Otherwise it is
        # considered as a directory
        if repr(uri).startswith(self.VIDEOS_URL):
            return { 'file_type' : 'video',
                     'mime_type' : '' }
        else:
            return { 'file_type' : 'directory',
                     'mime_type' : '' }

    def is_directory(self, uri):
        return self._def_action.insert_action(0, self._is_directory, uri)

    def _is_directory(self, uri):
        # if the uri doesn't start with the stage6 video domain,
        # we know it is a directory
        return not repr(uri).startswith(self.VIDEOS_URL)

    def has_children(self, uri):
        return self._def_action.insert_action(0, self._has_children, uri)

    def _has_children(self, uri):
        # We can consider that a video tag we have found on the stage6
        # always have videos linked to.
        return self.is_directory(uri)

    def has_children_with_types(self, uri, media_types):
        return self._def_action.insert_action(0, self._has_children_with_types, uri, media_types)

    def _has_children_with_types(self, uri, media_types):
        if 'video' in media_types:
            return self._is_directory(uri)
        else:
            return False

    def _get_cached_uri(self, uri, children, add_info):
        """
        Return the list of children from a parent URI,
        or None if this URI has not yet been cached
        """
        self._mutex.testandset()

        ret = None
        # If we have the uri cached, return it
        if self._cache.has_key(repr(uri)):
            for i in self._cache[repr(uri)]:
                if add_info:
                    children.append(i)
                else:
                    children.append(i[0])
            ret = children

        self._mutex.unlock()

        return ret


    def _add_to_cache(self, parent, child, info):
        """
        Attach a child to a parent in the cache
        """

        self._mutex.testandset()

        parent = repr(parent)
        if not self._cache.has_key(parent):
            self._cache[parent] = [(child, info) ,]
        else:
            self._cache[parent].append((child, info))

        self._mutex.unlock()


    def get_real_uri(self, uri):
        # At this point we need to convert our internal stage6
        # uri to the real http uri that can be used to play a video
        # Fortunately, we just have to change the scheme.
        http = MediaUri(uri)
        http.scheme = 'http'
        self.info("The URI is %s" % http)
        return http


    def _read_url(self, url):
        """
        Read an URI and return its content
        """

        """
        dl = Downloader(url, "stage6.html")
        scheme, host, port, path = client._parse(url)
        reactor.connectTCP(host, port, dl)

        def download_complete(result):
            print "Download Complete."
            reactor.stop()

        def download_error(failure):
            print "Error:", failure.getErrorMessage()
            reactor.stop()

        dl.deferred.addCallback(download_complete).addErrback(download_error)
        """

        try:
            f = urllib2.urlopen(url)
        except urllib2.URLError, u:
            self.warning("Could not connect to stage6-server: %s" % u)
            return None
        return f.read()


    def _retrieve_children(self, uri, list_of_children, add_info=False):
        """
        retrieve the children of uri and fills list

        @param uri:                     the URI to analyze
        @type uri:                      L{elisa.core.media_uri.MediaUri}
        @param list_of_children:        List where the children will be appended
        @type list_of_children:         list
        @param add_info:                Add also the thumbnails to the list
        @type add_info:                 bool
        """

        # If the uri requested is in the cache, we return the cached children
        cache = self._get_cached_uri(uri, list_of_children, add_info)
        if cache:
            self.debug('Loaded from cache: %s' % repr(uri))
            return cache

        # if the uri path is /, we have to retrieve the tags from the main stage6 page
        if uri.path == '/':
            ### Make a main menu:

            uri = MediaUri('stage6:///videos/')
            uri.label = T_(N_("Featured Videos"))
            list_of_children.append((uri, {}))

            uri = MediaUri("stage6:///videos/order:hotness")
            uri.label = T_(N_("Hottest Videos"))
            list_of_children.append((uri, {}))

            uri = MediaUri("stage6:///videos/order:date")
            uri.label = T_(N_("Latest Videos"))
            list_of_children.append((uri, {}))

            uri = MediaUri("stage6:///videos/order:length")
            uri.label = T_(N_("Longest Videos"))
            list_of_children.append((uri, {}))

            uri = MediaUri("stage6:///tags")
            uri.label = T_(N_("Tags"))
            list_of_children.append((uri, {}))

            if self._loggedIn:
                uri = MediaUri("stage6:///me")
                uri.label = T_(N_("my WatchList"))
                list_of_children.append((uri, {}))

            if len(self._certain_uris):
                uri = MediaUri('stage6:///certains')
                uri.label = T_(N_("Certain Uris"))
                list_of_children.append((uri, {}))

            return list_of_children
           
        elif uri.path == '/tags':
            ### Make a main menu:
            uri = MediaUri('stage6:///tags/order:NoOrder')
            uri.label = T_(N_("Featured tags"))
            list_of_children.append((uri, {}))
            uri = MediaUri("stage6:///tags/order:hotness")
            uri.label = T_(N_("Hottest tags"))
            list_of_children.append((uri, {}))
            uri = MediaUri("stage6:///tags/order:date")
            uri.label = T_(N_("Latest tags"))
            list_of_children.append((uri, {}))
            uri = MediaUri("stage6:///tags/order:length")
            ## Does that make any sense?
            uri.label = T_(N_("Largest tags"))
            list_of_children.append((uri, {}))
            return list_of_children

        elif uri.path == '/certains':
            for uri,label in self._certain_uris.items():
                m = MediaUri(uri)
                if label != None:
                    m.label = label
                list_of_children.append((m, {}))

        # we have a list of tags        
        elif uri.path.startswith('/tags'):
            # We retrieve the HTML page
            path = uri.path[5:]
            to_parse = self._read_url(self.TAGS_URL + uri.path)
            if not to_parse:
                ## An Error happend:
                return list_of_children
            # create the parser and retrieve the tags
            parser = StageParser(to_parse)
            tags = parser.get_tags()

            # We add to the children list a MediaUri representing each tag
            for tag in tags:
                t = MediaUri("stage6:///videos/tag:%s%s" % (tag,path))
                t.label = tag

                if add_info:
                    list_of_children.append((t, {}))
                else:
                    list_of_children.append(t)
                # Cache the uri
                self._add_to_cache(uri, t, {})

        elif uri.path == "/me":
            url = "http://stage6.divx.com/user/any/watchlist/"
            to_parse =  self._read_url(url)
            if not to_parse:
                ### An Error happend:
                return list_of_children

            parser = StageParser(to_parse)            
            list = parser.get_watchlist()
            for item in list:
                uri = MediaUri("stage6://%s" % item['href'])
                uri.label = item['label']
                img = MediaUri(item['img'])
                list_of_children.append( (uri, {'default_image' : img}))
        elif uri.path.startswith('/pager'):
            pages = int(uri.get_param('pages', 0))
            # FIXME: how could this be done i18n ?
            pages_string = "Page %%%d.d" % len(str(pages))
            path = uri.path[7:]
            page_n = 2
            while page_n <= pages:
                page = MediaUri("stage6:///%s?page=%s" %
                                (path, page_n))
                page.label = pages_string % page_n
                list_of_children.append((page, {}))
                page_n +=1
        else:
            path = uri.path[1:] # Remove first slash
            page = uri.get_param('page', 1)
            url_path = "%s%s?page=%s" % (self.MAIN_URL, path, page)
            self.debug("Asking stage6 for: %s" % url_path) 
            # download HTML page and parse it to retrieve the video list
            to_parse = self._read_url(url_path)
            if not to_parse:
                self.warning("Reading didn't work")
                ### Error:
                return list_of_children
            parser = StageParser(to_parse)
            videos = parser.get_videos()
            self.debug("Found %s videos on that page" % len(videos))
            ## uncomment this line an comment the line beneath it, and the
            ## paging is enabled
            pages = 0
            if self._more_pages:
                pages = parser.get_pages()

            # We add to the children list a MediaUri representing each video
            for v in videos:
                t = MediaUri(self.VIDEOS_URL + v[0] + "/.avi")
                label = v[1].decode("utf-8")
                # set the uri label to the name of the video
                t.label = unquote(label)
                if add_info:
                    # Add the thumbnail url to the info dict
                    uri = v[2]
                    d = DictObservable()
                    if uri.startswith('http'):
                        d['default_image'] = MediaUri(uri)
                    if v[3] != None:
                        d['length'] = v[3]
                    list_of_children.append((t, d))
                    # Cache the uri
                    self._add_to_cache(uri, t, d)
                else:
                    list_of_children.append(t)
                    # Cache the uri
                    self._add_to_cache(uri, t, {})

            if pages > 1 and page == 1:
                if pages > self._max_pages:
                    pages = self._max_pages

                pager = MediaUri("stage6:///pager/%s?pages=%s"
                                    %(path, pages))
                pager.label = T_(N_("Other Pages"))
                list_of_children.append((pager, {}))

        return list_of_children

    def get_direct_children(self, uri, children_with_info):
        # Same as get_direct_children() except we also fill an information dict
        return self._def_action.insert_action(0, self._retrieve_children, uri, children_with_info, add_info=True)


    def next_location(self, uri, root=None):
        return self._def_action.insert_action(0, self._next_location, uri, root=root)

    def _next_location(self, uri, root=None):
        if not root:
            root_str = MediaUri(u'stage6:///')
        else:
            root_str = repr(root)

        to_find = repr(uri)
        # is it cached ?
        if self._cache.has_key(root_str):
            for child, children in self._cache.iteritems():
                # look if it is a child of root
                if child.startswith(root_str):
                    i = 0
                    while i < len(self._cache[child]):
                        # Is that our uri ?
                        if to_find == self._cache[child][i]:
                            # Check if there is a uri following
                            i += 1
                            if i < len(self._cache[child]):
                                # if yes, returns it
                                return MediaUri(self._cache[child][i])
                            break
                        i += 1

        return None

    def previous_location(self, uri):
        return None

    def uri_is_monitorable(self, uri):
        # We cannot monitor the uri for a change
        return False

    def uri_is_monitored(self, uri):
        # Always cannot be monitored
        return False

    def open(self, uri, mode=None, block=True):
        # We cannot open 'tags'
        if self.is_directory(uri):
            return None

        # What we do here is convert the uri in its http form,
        # and ask the media_manager to provide a suitable component
        # - such as GnomeVFSProvider - to do the work for us
        uri = self.get_real_uri(uri)
        media_manager = common.application.media_manager
        if media_manager.enabled:
            media = media_manager.open(uri, mode, block)
        else:
            media = None
        return media

