There is also a bash script in scripts/install (which we'll later user in our github CI)
Thanks to niceguiwidget's InputWebserver base class a the cc_webserver module only needs a few lines:
"""
Created on 2023-11-18
@author: wf
"""
from ngwidgets.input_webserver import InputWebserver
from ngwidgets.webserver import WebserverConfig
from corpus.version import Version
class ConferenceCorpusWebserver(InputWebserver):
"""
Webserver for the Conference Corpus
"""
@classmethod
def get_config(cls) -> WebserverConfig:
"""
get the configuration for this Webserver
"""
copy_right = "(c)2020-2023 Wolfgang Fahl"
config = WebserverConfig(
copy_right=copy_right, version=Version(), default_port=5005
)
return config
def __init__(self):
"""Constructor"""
InputWebserver.__init__(self, config=ConferenceCorpusWebserver.get_config())
ccServer -s -c
will start your webserver and open a browser to access it. The default port is 5005 and therefore the link http://localhost:5005 should allow you test the results.
ccServer -s --host 0.0.0.0 port=80
Will make the server the default webserver on your intranet and http://<your-hostname> will allow you to access your server.
The default menu looks like this:
And the default footer like this:
For this tutorial consider DataSource to be just and example - we only show the title here anyway so you could do something like data_sources=["A","B","C"] to tryout the principle in your own application.
The core idea here is that setup_content_div will accept an asynchronous or normal function as a parameter and then call that to get the ui elements for your pages. This way the layout of all pages can be the same - just the content changes.
def setup_home(self):
"""
first load all data sources then
show a table of these
"""
msg="loading datasources ..."
self.loading_msg=ui.html(msg)
profiler=Profiler(msg,profile=False)
DataSource.getAll()
elapsed=profiler.time()
data_sources=DataSource.sources.values()
msg=f"{len(data_sources)} datasources loaded in {elapsed*1000:5.0f} msecs"
self.loading_msg.content=msg
for index,source in enumerate(data_sources,start=1):
ui.label(f"{index}:{source.title}")
pass
async def home(self, _client: Client):
"""
provide the main content page
"""
await(self.setup_content_div(self.setup_home))
Thanks to niceguiwidget's InputWebserver base class a the cc_webserver module only needs a few lines:
"""
Created on 2023-11-18
@author: wf
"""
from ngwidgets.input_webserver import InputWebserver
from ngwidgets.webserver import WebserverConfig
from corpus.version import Version
class ConferenceCorpusWebserver(InputWebserver):
"""
Webserver for the Conference Corpus
"""
@classmethod
def get_config(cls) -> WebserverConfig:
"""
get the configuration for this Webserver
"""
copy_right = "(c)2020-2023 Wolfgang Fahl"
config = WebserverConfig(
copy_right=copy_right, version=Version(), default_port=5005
)
return config
def __init__(self):
"""Constructor"""
InputWebserver.__init__(self, config=ConferenceCorpusWebserver.get_config())
ccServer -s -c
will start your webserver and open a browser to access it. The default port is 5005 and therefore the link http://localhost:5005 should allow you test the results.
ccServer -s --host 0.0.0.0 port=80
Will make the server the default webserver on your intranet and http://<your-hostname> will allow you to access your server.
For this tutorial consider DataSource to be just and example - we only show the title here anyway so you could do something like data_sources=["A","B","C"] to tryout the principle in your own application.
The core idea here is that setup_content_div will accept an asynchronous or normal function as a parameter and then call that to get the ui elements for your pages. This way the layout of all pages can be the same - just the content changes.
def setup_home(self):
"""
first load all data sources then
show a table of these
"""
msg="loading datasources ..."
self.loading_msg=ui.html(msg)
profiler=Profiler(msg,profile=False)
DataSource.getAll()
elapsed=profiler.time()
data_sources=DataSource.sources.values()
msg=f"{len(data_sources)} datasources loaded in {elapsed*1000:5.0f} msecs"
self.loading_msg.content=msg
for index,source in enumerate(data_sources,start=1):
ui.label(f"{index}:{source.title}")
pass
async def home(self, _client: Client):
"""
provide the main content page
"""
await(self.setup_content_div(self.setup_home))
Issue 59 - refactor eventseries RESTFul service to nicegui/FastAPI
Note how we separate the concerns and have function to get the eventSeries as a dict of list of dicts and then a conversion to the different formats that is going to be used by the Webserver to provide the results in a RESTFul way.
import io
import re
import pandas as pd
from dataclasses import dataclass, asdict
from fastapi import Response
from fastapi.responses import JSONResponse,FileResponse
from tabulate import tabulate
from typing import List
from spreadsheet.spreadsheet import ExcelDocument
from corpus.lookup import CorpusLookup
from corpus.datasources.openresearch import OREvent, OREventSeries
from corpus.eventseriescompletion import EventSeriesCompletion
class EventSeriesAPI():
"""
API service for event series data
"""
def __init__(self,lookup:CorpusLookup):
'''
construct me
Args:
lookup
'''
self.lookup=lookup
def getEventSeries(self,name: str,bks:str=None,reduce:bool=False):
'''
Query multiple datasources for the given event series
Args:
name(str): the name of the event series to be queried
'''
multiQuery = "select * from {event}"
idQuery = f"""select source,eventId from event where lookupAcronym LIKE "{name} %" order by year desc"""
dictOfLod = self.lookup.getDictOfLod4MultiQuery(multiQuery, idQuery)
if bks:
allowedBks = bks.split(",") if bks else None
self.filterForBk(dictOfLod.get("tibkat"), allowedBks)
if reduce:
for source in ["tibkat", "dblp"]:
sourceRecords = dictOfLod.get(source)
if sourceRecords:
reducedRecords = EventSeriesCompletion.filterDuplicatesByTitle(sourceRecords)
dictOfLod[source] = reducedRecords
return dictOfLod
def filterForBk(self,lod:List[dict], allowedBks:List[str]):
"""
Filters the given dict to only include the records with their bk in the given list of allowed bks
Args:
lod: list of records to filter
allowedBks: list of allowed bks
"""
if lod is None or allowedBks is None:
return
mainclassBk = set()
subclassBk = set()
allowNullValue = False
for bk in allowedBks:
if bk.isnumeric():
mainclassBk.add(bk)
elif re.fullmatch(r"\d{1,2}\.\d{2}", bk):
subclassBk.add(bk)
elif bk.lower() == "null" or bk.lower() == "none":
allowNullValue = True
def filterBk(record:dict) -> bool:
keepRecord: bool = False
bks = record.get("bk")
if bks is not None:
bks = set(bks.split("⇹"))
recordMainclasses = {bk.split(".")[0] for bk in bks}
if mainclassBk.intersection(recordMainclasses) or subclassBk.intersection(bks):
keepRecord = True
elif bks is None and allowNullValue:
keepRecord = True
return keepRecord
lod[:] = [record for record in lod if filterBk(record)]
def generateSeriesSpreadsheet(self, name:str, dictOfLods: dict) -> ExcelDocument:
"""
Args:
name(str): name of the series
dictOfLods: records of the series from different sources
Returns:
ExcelDocument
"""
spreadsheet = ExcelDocument(name=name)
# Add completed event sheet and add proceedings sheet
eventHeader = [
"item",
"label",
"description",
"Ordinal",
"OrdinalStr",
"Acronym",
"Country",
"City",
"Title",
"Series",
"Year",
"Start date",
"End date",
"Homepage",
"dblp",
"dblpId",
"wikicfpId",
"gndId"]
proceedingsHeaders = ["item", "label", "ordinal", "ordinalStr", "description", "Title", "Acronym",
"OpenLibraryId", "oclcId", "isbn13", "ppnId", "gndId", "dblpId", "doi", "Event",
"publishedIn"]
eventRecords = []
for lod in dictOfLods.values():
eventRecords.extend(lod)
completedBlankEvent = EventSeriesCompletion.getCompletedBlankSeries(eventRecords)
eventSheetRecords = []
proceedingsRecords = []
for year, ordinal in completedBlankEvent:
eventSheetRecords.append({**{k: None for k in eventHeader}, "Ordinal": ordinal, "Year": year})
proceedingsRecords.append({**{k: None for k in proceedingsHeaders}, "ordinal": ordinal})
if not eventSheetRecords:
eventSheetRecords = [{k: None for k in eventHeader}]
proceedingsRecords = [{k: None for k in proceedingsHeaders}]
spreadsheet.addTable("Event", eventSheetRecords)
spreadsheet.addTable("Proceedings", proceedingsRecords)
for lods in [dictOfLods, asdict(MetadataMappings())]:
for sheetName, lod in lods.items():
if isinstance(lod, list):
lod.sort(key=lambda record: 0 if record.get('year',0) is None else record.get('year',0))
spreadsheet.addTable(sheetName, lod)
return spreadsheet
async def convertToRequestedFormat(self, name: str, dictOfLods: dict, markup_format: str = "json"):
"""
Converts the given dicts of lods to the requested markup format.
Supported formats: json, html, excel, pd_excel, various tabulate formats.
Default format: json
Args:
dictOfLods: data to be converted
Returns:
Response
"""
if markup_format.lower() == "excel":
# Custom Excel spreadsheet generation
spreadsheet = self.generateSeriesSpreadsheet(name, dictOfLods)
spreadsheet_io = io.BytesIO(spreadsheet.toBytesIO().getvalue()) # Ensure it's a BytesIO object
spreadsheet_io.seek(0)
return FileResponse(spreadsheet_io, media_type="application/vnd.ms-excel", filename=f"{name}.xlsx")
elif markup_format.lower() == "pd_excel":
# Pandas style Excel spreadsheet generation
df = pd.DataFrame.from_dict({k: v for lod in dictOfLods.values() for k, v in lod.items()})
excel_io = io.BytesIO()
with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
df.to_excel(writer, sheet_name=name)
excel_io.seek(0)
return FileResponse(excel_io, media_type="application/vnd.ms-excel", filename=f"{name}.xlsx")
elif markup_format.lower() == "json":
# Direct JSON response
return JSONResponse(content=dictOfLods)
else:
# Using tabulate for other formats (including HTML)
tabulated_content = tabulate([lod for lod in dictOfLods.values()], headers="keys", tablefmt=markup_format)
media_type = "text/plain" if markup_format.lower() != "html" else "text/html"
return Response(content=tabulated_content, media_type=media_type)
@dataclass
class MetadataMappings:
"""
Spreadsheet metadata mappings
"""
WikidataMapping: list = None
SmwMapping: list = None
def __init__(self):
self.WikidataMapping = [
{'Entity': 'Event series', 'Column':None, 'PropertyName': 'instanceof', 'PropertyId': 'P31', 'Value': 'Q47258130'},
{'Entity': 'Event series', 'Column': 'Acronym', 'PropertyName': 'short name', 'PropertyId': 'P1813', 'Type': 'text'},
{'Entity': 'Event series', 'Column': 'Title', 'PropertyName': 'title', 'PropertyId': 'P1476', 'Type': 'text'},
{'Entity': 'Event series', 'Column': 'Homepage', 'PropertyName': 'official website', 'PropertyId': 'P856', 'Type': 'url'},
{'Entity': 'Event', 'Column':None, 'PropertyName': 'instanceof', 'PropertyId': 'P31', 'Value': 'Q2020153'},
{'Entity': 'Event', 'Column': 'Series', 'PropertyName': 'part of the series', 'PropertyId': 'P179'},
{'Entity': 'Event', 'Column': 'Ordinal', 'PropertyName': 'series ordinal', 'PropertyId': 'P1545', 'Type': 'string', 'Qualifier': 'part of the series'},
{'Entity': 'Event', 'Column': 'Acronym', 'PropertyName': 'short name', 'PropertyId': 'P1813', 'Type': 'text'},
{'Entity': 'Event', 'Column': 'Title', 'PropertyName': 'title', 'PropertyId': 'P1476', 'Type': 'text'},
{'Entity': 'Event', 'Column': 'Country', 'PropertyName': 'country', 'PropertyId': 'P17', 'Lookup': 'Q3624078'},
{'Entity': 'Event', 'Column': 'City', 'PropertyName': 'location', 'PropertyId': 'P276', 'Lookup': 'Q515'},
{'Entity': 'Event', 'Column': 'Start date', 'PropertyName': 'start time', 'PropertyId': 'P580', 'Type': 'date'},
{'Entity': 'Event', 'Column': 'End date', 'PropertyName': 'end time', 'PropertyId': 'P582', 'Type': 'date'},
{'Entity': 'Event', 'Column': 'gndId', 'PropertyName': 'GND ID', 'PropertyId': 'P227', 'Type': 'extid'},
{'Entity': 'Event', 'Column': 'dblpUrl', 'PropertyName': 'describedAt', 'PropertyId': 'P973', 'Type': 'url'},
{'Entity': 'Event', 'Column': 'Homepage', 'PropertyName': 'official website', 'PropertyId': 'P856', 'Type': 'url'},
{'Entity': 'Event', 'Column': 'wikicfpId', 'PropertyName': 'WikiCFP event ID', 'PropertyId': 'P5124', 'Type': 'extid'},
{'Entity': 'Event', 'Column': 'dblpId', 'PropertyName': 'DBLP event ID', 'PropertyId': 'P10692', 'Type': 'extid'},
{'Entity': 'Proceedings', 'Column':None, 'PropertyName': 'instanceof', 'PropertyId': 'P31', 'Value': 'Q1143604'},
{'Entity': 'Proceedings', 'Column': 'Acronym', 'PropertyName': 'short name', 'PropertyId': 'P1813', 'Type': 'text'},
{'Entity': 'Proceedings', 'Column': 'Title', 'PropertyName': 'title', 'PropertyId': 'P1476', 'Type': 'text'},
{'Entity': 'Proceedings', 'Column': 'OpenLibraryId', 'PropertyName': 'Open Library ID', 'PropertyId': 'P648', 'Type': 'extid'},
{'Entity': 'Proceedings', 'Column': 'ppnId', 'PropertyName': 'K10plus PPN ID', 'PropertyId': 'P6721', 'Type': 'extid'},
{'Entity': 'Proceedings', 'Column': 'Event', 'PropertyName': 'is proceedings from', 'PropertyId': 'P4745', 'Lookup': 'Q2020153'},
{'Entity': 'Proceedings', 'Column': 'publishedIn', 'PropertyName': 'published in', 'PropertyId': 'P1433', 'Lookup': 'Q39725049'},
{'Entity': 'Proceedings', 'Column': 'oclcId', 'PropertyName': 'OCLC work ID','PropertyId': 'P5331', 'Type': 'extid'},
{'Entity': 'Proceedings', 'Column': 'isbn13', 'PropertyName': 'ISBN-13', 'PropertyId': 'P212', 'Type': 'extid'},
{'Entity': 'Proceedings', 'Column': 'doi', 'PropertyName': 'DOI', 'PropertyId': 'P356', 'Type': 'extid'},
{'Entity': 'Proceedings', 'Column': 'dblpId', 'PropertyName': 'DBLP event ID', 'PropertyId': 'P10692', 'Type': 'extid'},
]
self.SmwMapping = [
*[{"Entity":"Event",
"Column":r.get("templateParam"),
"PropertyName":r.get("name"),
"PropertyId":r.get("prop"),
"TemplateParam": r.get("templateParam")
} for r in OREvent.propertyLookupList
],
*[{"Entity": "Event series",
"Column": r.get("templateParam"),
"PropertyName": r.get("name"),
"PropertyId": r.get("prop"),
"TemplateParam": r.get("templateParam")
} for r in OREventSeries.propertyLookupList
]
]
see also http://cc2.bitplan.com/docs
@app.get('/eventseries/{name}')
def get_eventseries(name: str, bks: str = "", reduce: bool = False, format: str = "json"):
# Use the parameters directly in the API calls
event_series_dict = self.event_series_api.getEventSeries(name, bks, reduce)
response = self.event_series_api.convertToRequestedFormat(name, event_series_dict, format)
return response
"""
Created on 19.11.2023
@author: wf
"""
from corpus.web.cc_webserver import ConferenceCorpusWebserver
from corpus.web.cc_cmd import WebserverCmd
from ngwidgets.webserver_test import WebserverTest
import json
class RestFulApiTest(WebserverTest):
"""
test the conference corpus RESTFul APIs
"""
def setUp(self, debug=False, profile=True):
cmd_class=WebserverCmd
server_class=ConferenceCorpusWebserver
WebserverTest.setUp(self, server_class, cmd_class, debug=debug, profile=profile)
def test_event_series_api(self):
"""
Test getting event series information from the API.
This test method sends a GET request to the `/eventseries/{name}` endpoint
to retrieve data about a specific event series. The test verifies that the
response is a valid JSON object and checks its structure and contents as needed.
The specific event series tested here is identified by the acronym "AISI".
See: https://github.com/WolfgangFahl/ConferenceCorpus/issues/59
"""
test_series=[
("AISI",['confref', 'gnd', 'wikicfp', 'tibkat', 'wikidata'],21)
]
debug=self.debug
#debug=True
for name,expected_sources,expected_event_count in test_series:
path = f"/eventseries/{name}"
json_dict = self.get_json(path)
json_str=json.dumps(json_dict,indent=2,default=str)
sources=list(json_dict.keys())
event_count=0
if debug:
print("JSON Response:\n", json_str)
print(f"Sources: {sources}")
for source,events in json_dict.items():
if debug:
print(f" {source}:{len(events)}")
event_count+=len(events)
if debug:
print(f"Event Count: {event_count}")
self.assertEquals(expected_sources,sources)
self.assertTrue(expected_event_count<=event_count)
def test_event_series_api_formats(self):
"""
test different markup formats
"""
test_series=[
("AISI","")
]
debug=True
for name,needle in test_series:
for markup_format in ["github"]:
path=f"/eventseries/{name}?format={markup_format}"
markup=self.get_html(path)
if debug:
print(f"{markup_format}:")
print(markup)
scripts/test --venv -tn test_restful_apis
import json
from dataclasses import asdict
from spreadsheet.googlesheet import GoogleSheet
from corpus.web.eventseries import MetadataMappings, EventSeriesAPI
from tests.datasourcetoolbox import DataSourceTest
from corpus.lookup import CorpusLookup
class TestEventSeriesAPI(DataSourceTest):
"""
tests EventSeriesBlueprint
"""
@classmethod
def setUpClass(cls)->None:
super(TestEventSeriesAPI, cls).setUpClass()
cls.lookup=CorpusLookup()
def setUp(self, debug=False, profile=True, timeLimitPerTest=10.0):
DataSourceTest.setUp(self, debug=debug, profile=profile, timeLimitPerTest=timeLimitPerTest)
self.lookup=TestEventSeriesAPI.lookup
def testLookup(self):
"""
check the lookup
"""
self.assertTrue(self.lookup is not None)
debug=self.debug
datasource_count=len(self.lookup.eventCorpus.eventDataSources)
if debug:
print(f"found {datasource_count} datasources")
self.assertTrue(datasource_count>3)
def test_extractWikidataMapping(self):
"""
extracts wikidata metadata mapping from given google docs url
"""
url = "https://docs.google.com/spreadsheets/d/1-6llZSTVxNrYH4HJ0DotMjVu9cTHv2WnT_thfQ-3q14"
gs = GoogleSheet(url)
gs.open(["Wikidata"])
lod=gs.asListOfDicts("Wikidata")
debug=self.debug
#debug=True
if debug:
print(f"found {len(lod)} events")
print(json.dumps(lod,indent=2))
self.assertEqual(23,len(lod))
pass
#sheet = [{k:v for k,v in record.items() if (not k.startswith("Unnamed")) and v is not ''} for record in ]
#print(sheet)
def test_MetadataMapping(self):
"""
test the metadata mapping
"""
mapping = MetadataMappings()
debug=self.debug
#debug=True
mapping_dict=asdict(mapping)
if debug:
print(json.dumps(mapping_dict,indent=2))
self.assertTrue("WikidataMapping" in mapping_dict)
def testGetEventSeries(self):
'''
tests the multiquerying of event series over api
some 17 secs for test
'''
es_api=EventSeriesAPI(self.lookup)
dict_of_lods=es_api.getEventSeries(name="WEBIST")
debug=self.debug
#debug=True
if debug:
print(json.dumps(dict_of_lods,indent=2,default=str))
self.assertTrue("confref" in dict_of_lods)
self.assertTrue(len(dict_of_lods["confref"]) > 15)
def test_getEventSeriesBkFilter(self):
"""
tests getEventSeries bk filter
see https://github.com/WolfgangFahl/ConferenceCorpus/issues/55
"""
bks_list = ["85.20", "54.65,54.84", "54.84,85"]
es_api=EventSeriesAPI(self.lookup)
for bks in bks_list:
res=es_api.getEventSeries(name="WEBIST", bks=bks)
self.assertIn("tibkat", res)
bksPerRecord = [record.get("bk") for record in res.get("tibkat")]
expectedBks = set(bks.split(","))
for rawBks in bksPerRecord:
self.assertIsNotNone(rawBks)
bks = set(rawBks.split("⇹"))
bks = bks.union({bk.split(".")[0] for bk in bks})
self.assertTrue(bks.intersection(expectedBks))
def test_filterForBk(self):
"""
tests filterForBk
"""
testMatrix = [
(['54.84⇹85.20', '54.84⇹85.20', '54.84⇹81.68⇹85.20⇹88.03⇹54.65', '85.20'], ["54"], 3),
(['54.84⇹85.20', '54.84⇹85.20', '54.84⇹81.68⇹85.20⇹88.03⇹54.65', '85.20'], ["54","85"], 4),
(['54.84⇹85.20', '54.84⇹85.20', '54.84⇹81.68⇹85.20⇹88.03⇹54.65', '85.20'], ["85.20"], 4),
(['54.84⇹85.20', '54.84⇹85.20', '54.84⇹81.68⇹85.20⇹88.03⇹54.65', '85.20'], ["02"], 0),
(['54.84⇹85.20', '54.84⇹85.20', '54.84⇹81.68⇹85.20⇹88.03⇹54.65', '85.20'], ["81.68","88.03"], 1),
(['54.84⇹85.20', '54.84⇹85.20', None, '85.20'], ["54"], 2),
(['54.84⇹85.20', '54.84⇹85.20', None, '85.20'], ["54","null"], 3),
(['54.84⇹85.20', '54.84⇹85.20', None, '85.20'], ["null"], 1),
(['54.84⇹85.20', '54.84⇹85.20', None, '85.20'], ["none"], 1),
]
es_api=EventSeriesAPI(self.lookup)
for recordData, bkFilter, expectedNumberOfRecords in testMatrix:
lod = [{"bk":bk} for bk in recordData]
es_api.filterForBk(lod, bkFilter)
self.assertEqual(len(lod), expectedNumberOfRecords, f"Tried to filter for {bkFilter} and expected {expectedNumberOfRecords} but filter left {len(lod)} in the list")
def testTibkatReducingRecords(self):
"""
tests deduplication of the tibkat records if reduce parameter is set
"""
name="AAAI"
expected={
False: 400,
True: 50
}
es_api=EventSeriesAPI(lookup=self.lookup)
for reduce in (True,False):
with self.subTest(msg=f"Testing with reduce={reduce}", testParam=reduce):
res=es_api.getEventSeries(name=name, reduce=reduce)
self.assertIn("tibkat", res)
tibkat_count=len(res.get("tibkat"))
should=expected[reduce]<=tibkat_count
if reduce:
should=not should
self.assertTrue(should)
ConferenceCorpus % scripts/test --venv -tn test_eventseries_api
No new changes in tests or corpus directories.
Starting test testGetEventSeries ... with debug=False ...
test testGetEventSeries ... with debug=False took 8.5 s
.Starting test testLookup ... with debug=False ...
test testLookup ... with debug=False took 0.0 s
.Starting test testTibkatReducingRecords ... with debug=False ...
test testTibkatReducingRecords ... with debug=False took 5.2 s
.Starting test test_MetadataMapping ... with debug=False ...
test test_MetadataMapping ... with debug=False took 0.0 s
.Starting test test_extractWikidataMapping ... with debug=False ...
test test_extractWikidataMapping ... with debug=False took 0.5 s
.Starting test test_filterForBk ... with debug=False ...
test test_filterForBk ... with debug=False took 0.0 s
.Starting test test_getEventSeriesBkFilter ... with debug=False ...
test test_getEventSeriesBkFilter ... with debug=False took 2.9 s
.
----------------------------------------------------------------------
Ran 7 tests in 17.202s
OK
There is scripts/test script that has runs the python unit tests from the test directory:
There is scripts/test script that has runs the python unit tests from the test directory:
Usage: scripts/test [OPTIONS]
Options:
-b, --background Run tests in the background and log output.
-d, --debug Show environment for debugging.
-g, --green Run tests using the green test runner.
-h, --help Display this help message.
-p, --python Specify the Python interpreter to use.
-tn, --test_name Run only the specified test module.
--venv Use a virtual environment for testing.
Example:
scripts/test --python python3.10 --background
Below is a call that runs a single test in a venv environment
scripts/test --venv -tn testPainScale
No new changes in tests or corpus directories.
Starting test testPainImages ... with debug=False ...
test testPainImages ... with debug=False took 0.7 s
.
----------------------------------------------------------------------
Ran 1 test in 0.713s
OK
Ran 161 tests in 1041.419s
FAILED (errors=18, skipped=8)
We use github workflows for our CI with two workflow yaml files:
The build workflow uses a matrix of python versions and operating systems. During the migration phase we only use Ubuntu and Python 3.10 to speed up the actions handling. In our projects actions list we can see wether our CI works or still fails:
To serve from an Apache Server we need to start the service (e.g. on reboot) and make it available via an apache configuration. We also have to make sure the service is available on the internet and findable by the DNS system.
My services are hoste via hosteurope where i can access my DNS entries via its KIS system
By adding cc2 as a cname for on.bitplan.com http://cc2.bitplan.com now points to http://on.bitplan.com
Our server is an Ubuntu 22.04 LTS machine. The configuration is at
/etc/apache2/sites-available/conferencecorpus.conf
My services are hoste via hosteurope where i can access my DNS entries via its KIS system
By adding cc2 as a cname for on.bitplan.com http://cc2.bitplan.com now points to http://on.bitplan.com
Our server is an Ubuntu 22.04 LTS machine. The configuration is at
/etc/apache2/sites-available/conferencecorpus.conf
<VirtualHost *:80 >
# The ServerName directive sets the request scheme, hostname and port that
# the server uses to identify itself. This is used when creating
# redirection URLs. In the context of virtual hosts, the ServerName
# specifies what hostname must appear in the request's Host: header to
# match this virtual host. For the default virtual host (this file) this
# value is not decisive as it is used as a last resort host regardless.
# However, you must set it for any further virtual host explicitly.
ServerName cc2.bitplan.com
ServerAdmin webmaster@bitplan.com
#DocumentRoot /var/www/html
# Available loglevels: trace8, ..., trace1, debug, info, notice, warn,
# error, crit, alert, emerg.
# It is also possible to configure the loglevel for particular
# modules, e.g.
#LogLevel info ssl:warn
ErrorLog ${APACHE_LOG_DIR}/cc_error.log
CustomLog ${APACHE_LOG_DIR}/cc.log combined
# For most configuration files from conf-available/, which are
# enabled or disabled at a global level, it is possible to
# include a line for only one particular virtual host. For example the
# following line enables the CGI configuration for this host only
# after it has been globally disabled with "a2disconf".
#Include conf-available/serve-cgi-bin.conf
RewriteEngine On
RewriteCond %{HTTP:Upgrade} =websocket [NC]
RewriteRule /(.*) ws://localhost:5005/$1 [P,L]
RewriteCond %{HTTP:Upgrade} !=websocket [NC]
RewriteRule /(.*) http://localhost:5005/$1 [P,L]
# make local Conference Corpus webserver available
ProxyPassReverse / http://localhost:5005/
</VirtualHost>
sudo a2ensite conferencecorpus
Enabling site conferencecorpus.
To activate the new configuration, you need to run:
systemctl reload apache2
# we do a complete restart for good measure
service apache2 restart
The server should now in principle accessible at http://cc2.bitplan.com but will responde with HTML Code 503 Service Unavailable until we start the webservice
We restart our servers on reboot and every night. You need to adapt your contrab entries to your username
crontab -l | tail -4
# run startup on reboot
@reboot /home/<user>/bin/startup --all
# and every early morning 02:30 h
30 02 * * * /home/<user>/bin/startup --all
The startup script manages Python services, with error handling (`error`) and usage info (`usage`). This example only shows the single service ConferenceCorpus - in our context we have lots of services that are started this way on reboot or demand.
#!/bin/bash
# WF 2023-02-25
# startup jobs for Conference Corpus
# ansi colors
blue='\033[0;34m'
red='\033[0;31m'
green='\033[0;32m'
endColor='\033[0m'
# a colored message
color_msg() {
local l_color="$1"
local l_msg="$2"
echo -e "${l_color}$l_msg${endColor}"
}
# error
error() {
local l_msg="$1"
color_msg $red "Error:" 1>&2
color_msg $red "\t$l_msg" 1>&2
exit 1
}
# show usage
usage() {
echo "Usage: $0 [options]"
echo "Options:"
echo "-h |--help: Show this message"
echo "--cc: Start Conference Corpus"
exit 1
}
# start the given python service
start_python_service() {
local l_name=$1
local l_giturl=$2
local l_cmd=$3
local l_cmd_proc="$4"
local l_log=$(prepare_logging $l_name)
update $l_name $l_giturl true
background_service "$l_cmd" "$l_cmd_proc" $l_log
log "log is at $l_log"
}
# background service
background_service() {
local l_cmd="$1"
local l_cmd_proc="$2"
local l_log="$3"
pgrep -fla "$l_cmd_proc"
if [ $? -eq 0 ]
then
pkill -f "$l_cmd_proc"
fi
nohup $l_cmd > "$l_log" 2>&1 &
}
# update the given python project
update() {
local l_name=$1
local l_giturl=$2
local l_do_update=$3
local l_srcroot=$HOME/source/python
local l_service_root=$l_srcroot/$l_name
if [ ! -d $l_srcroot ]
then
mkdir -p $l_srcroot
fi
cd $l_srcroot
if [ ! -d $l_service_root ]
then
git clone $l_giturl $l_name
fi
cd $l_service_root
if [ "$l_do_update" == "true" ]
then
git pull
python -m venv .venv
source .venv/bin/activate
pip install --upgrade pip
scripts/install
fi
}
# prepare logging for the given service
prepare_logging() {
local l_name=$1
local l_logdir=/var/log/$l_name
local l_log=$l_logdir/$l_name.log
local l_idn=$(id -un)
local l_idg=$(id -gn)
if [ ! -d $l_logdir ]
then
sudo mkdir -p $l_logdir
fi
sudo chown $l_idn.$l_idg $l_logdir
sudo touch $l_log
sudo chown $l_idn.$l_idg $l_log
echo "$l_log"
}
# start Conference Corpus
start_conference_corpus() {
start_python_service cc https://github.com/WolfgangFahl/ConferenceCorpus "ccServer --serve" "ccServer --serve"
}
cd $HOME
. .profile
verbose=true
if [ $# -lt 1 ]
then
usage
else
while [ "$1" != "" ]
do
option="$1"
case $option in
"-h"|"--help")
usage
;;
"--cc")
start_conference_corpus
;;
"--all")
start_conference_corpus
;;
esac
shift
done
fi
Usage: startup [options]
Options:
-h |--help: Show this message
--cc: Start Conference Corpus
--all: Start all services