edt-parser/parser.py

#!/usr/bin/env python
#-*- encoding: utf-8

import sys
import requests
from html.parser import HTMLParser
import caldav, icalendar


# Constants definition
URL = "https://docs.google.com/spreadsheets/u/0/d/e/2PACX-1vQ9yzFLr5mXbIZVK3ucdUZuScAbLoCyPqzHr-5V0aYeCFEz7LuidPdk_EnkkJT-zjemzQQHaKvpeXW2/pubhtml/sheet?headers=false&gid=1619638924"
TIMETABLE = [
    ('08:00','08:00'),
    ('09:00','09:00'),
    ('10:00','10:15'),
    ('11:15','11:15'),
    ('12:15','12:15'),
    ('13:00','13:00'),
    ('13:30','13:30'),
    ('14:30','14:30'),
    ('15:30','15:45'),
    ('16:45','16:45'),
    ('17:45','17:45'),
]


class GoogleSheetsCalParser(HTMLParser):
    """
        Definition of a Google Sheets parser providing a table with each cell
        and its position and size
    """
    def __init__(self):
        self.inTable = False
        self.inCell = False
        self.row = -1
        self.column = -1
        HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        if self.inTable:
            if tag == 'tr':
                self.row += 1
                self.column = -1
            elif tag == 'td':
                self.column += 1
                if not (self.row < 3 or (self.row - 2) % 11 < 2):
                    self.inCell = True
        elif tag == 'tbody':
            self.inTable = True

    def handle_endtag(self, tag):
        if tag == 'td':
            self.inCell = False
        elif tag == 'tbody':
            self.inTable = False

    def handle_data(self, data):
        ...


# Getting the Google Sheet
try:
    r = requests.get(URL)
except ConnectionError as e:
    print(e)
    sys.exit(1)

if r.status_code != 200:
    print(f'Status Code {r.status_code}; could not continue.')
    sys.exit(1)

# Parsing the Sheet
calParser = GoogleSheetsCalParser()
calParser.feed(r.text)

# Transforming the cells into events


# Pushing events to caldav server