This repository has been archived on 2020-10-14. You can view files and clone it, but cannot push or open issues or pull requests.
edt-parser/parser.py

81 lines
1.9 KiB
Python
Raw Normal View History

2020-09-20 20:53:24 +02:00
#!/usr/bin/env python
#-*- encoding: utf-8
2020-09-20 21:11:48 +02:00
import sys
2020-09-20 20:53:24 +02:00
import requests
from html.parser import HTMLParser
import caldav, icalendar
2020-09-20 21:11:48 +02:00
# Constants definition
URL = "https://docs.google.com/spreadsheets/u/0/d/e/2PACX-1vQ9yzFLr5mXbIZVK3ucdUZuScAbLoCyPqzHr-5V0aYeCFEz7LuidPdk_EnkkJT-zjemzQQHaKvpeXW2/pubhtml/sheet?headers=false&gid=1619638924"
2020-09-20 21:23:23 +02:00
TIMETABLE = [
('08:00','08:00'),
('09:00','09:00'),
('10:00','10:15'),
('11:15','11:15'),
('12:15','12:15'),
('13:00','13:00'),
('13:30','13:30'),
('14:30','14:30'),
('15:30','15:45'),
('16:45','16:45'),
('17:45','17:45'),
]
2020-09-20 21:11:48 +02:00
class GoogleSheetsCalParser(HTMLParser):
2020-09-20 20:55:27 +02:00
"""
Definition of a Google Sheets parser providing a table with each cell
and its position and size
"""
2020-09-20 21:11:48 +02:00
def __init__(self):
self.inTable = False
2020-09-20 21:26:53 +02:00
self.inCell = False
2020-09-20 21:23:23 +02:00
self.row = -1
self.column = -1
2020-09-20 21:14:12 +02:00
HTMLParser.__init__(self)
2020-09-20 21:11:48 +02:00
2020-09-20 20:53:24 +02:00
def handle_starttag(self, tag, attrs):
2020-09-20 21:14:12 +02:00
if self.inTable:
2020-09-20 21:23:23 +02:00
if tag == 'tr':
self.row += 1
self.column = -1
elif tag == 'td':
self.column += 1
2020-09-20 21:26:53 +02:00
if not (self.row < 3 or (self.row - 2) % 11 < 2):
self.inCell = True
2020-09-20 21:23:23 +02:00
elif tag == 'tbody':
2020-09-20 21:14:12 +02:00
self.inTable = True
2020-09-20 20:53:24 +02:00
def handle_endtag(self, tag):
2020-09-20 21:26:53 +02:00
if tag == 'td':
self.inCell = False
elif tag == 'tbody':
self.inTable = False
2020-09-20 20:53:24 +02:00
def handle_data(self, data):
2020-09-20 21:14:12 +02:00
...
2020-09-20 20:53:24 +02:00
2020-09-20 20:55:27 +02:00
# Getting the Google Sheet
2020-09-20 21:11:48 +02:00
try:
r = requests.get(URL)
except ConnectionError as e:
print(e)
sys.exit(1)
2020-09-20 20:53:24 +02:00
2020-09-20 21:11:48 +02:00
if r.status_code != 200:
print(f'Status Code {r.status_code}; could not continue.')
sys.exit(1)
2020-09-20 20:55:27 +02:00
# Parsing the Sheet
2020-09-20 21:14:12 +02:00
calParser = GoogleSheetsCalParser()
2020-09-20 21:11:48 +02:00
calParser.feed(r.text)
2020-09-20 20:55:27 +02:00
# Transforming the cells into events
2020-09-20 21:11:48 +02:00
2020-09-20 20:55:27 +02:00
# Pushing events to caldav server
2020-09-20 21:11:48 +02:00