Source code for soupstars.examples.economist

"""
Economist
~~~~~~~~~

Extract metadata from economist index and article pages

"""

import datetime as dt

import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base

from soupstars import Parser, serialize
from soupstars.mixins.sqlalchemy_mixins import SqlalchemyMixin

Base = declarative_base()


[docs]class WeeklyIndexPages(Base):
    """
    Example model for storing the results of the parser
    """
    __tablename__ = "economist_weekly_pages"

    base_url = sa.Column(sa.String, primary_key=True)
    article_date = sa.Column(sa.Date)
    status_code = sa.Column(sa.Integer)
    num_articles = sa.Column(sa.Integer)


[docs]class WeeklyIndexPageParser(SqlalchemyMixin, Parser):
    """
    Parse metadata from the weekly updated index pages
    """

    Model = WeeklyIndexPages
    database_url = "sqlite:///:memory:"
    
[docs]    @serialize
    def base_url(self):
        "The url used"

        return self.url

[docs]    @serialize
    def article_date(self):
        "The date of the article"

        date_string = self.parsed_url.path.split('/')[-1]
        return dt.datetime.strptime(date_string, '%Y-%m-%d').date()

[docs]    @serialize
    def status_code(self):
        "Status code of the request"

        return self.response.status_code

[docs]    @serialize
    def num_articles(self):
        "The number of articles foudn on the page"

        return len(self.find_all('span', attrs={'class': 'print-edition__link-title'}))
Source code for soupstars.examples.economist

soupstars

Navigation

Related Topics