Source code for soupstars.examples.economist
"""
Economist
~~~~~~~~~
Extract metadata from economist index and article pages
"""
import datetime as dt
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from soupstars import Parser, serialize
from soupstars.mixins.sqlalchemy_mixins import SqlalchemyMixin
Base = declarative_base()
[docs]class WeeklyIndexPages(Base):
"""
Example model for storing the results of the parser
"""
__tablename__ = "economist_weekly_pages"
base_url = sa.Column(sa.String, primary_key=True)
article_date = sa.Column(sa.Date)
status_code = sa.Column(sa.Integer)
num_articles = sa.Column(sa.Integer)
[docs]class WeeklyIndexPageParser(SqlalchemyMixin, Parser):
"""
Parse metadata from the weekly updated index pages
"""
Model = WeeklyIndexPages
database_url = "sqlite:///:memory:"
[docs] @serialize
def base_url(self):
"The url used"
return self.url
[docs] @serialize
def article_date(self):
"The date of the article"
date_string = self.parsed_url.path.split('/')[-1]
return dt.datetime.strptime(date_string, '%Y-%m-%d').date()
[docs] @serialize
def status_code(self):
"Status code of the request"
return self.response.status_code
[docs] @serialize
def num_articles(self):
"The number of articles foudn on the page"
return len(self.find_all('span', attrs={'class': 'print-edition__link-title'}))