diff --git a/docs/tev2/23.2 b/docs/tev2/23.2 new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/tev2/saf.yaml b/docs/tev2/saf.yaml index 426bb78a2f..add9dbe3f3 100644 --- a/docs/tev2/saf.yaml +++ b/docs/tev2/saf.yaml @@ -10,11 +10,12 @@ scope: scopedir: https://github.com/tno-terminology-design/tev2-specifications/tree/master/docs/tev2 # URL of the scope-directory curatedir: terms # directory where all curated files are located. Full URL is `scopedir`/`curatedir` glossarydir: glossaries # directory where all glossary files and GDFs are located. Full URL is `scopedir`/`glossarydir` + website: https://tno-terminology-design.github.io/tev2-specifications/docs/tev2 # base URL for creating links to rendered versions of Curated Texts. It should also serve as the home page of the Tterminology. + navpath: /terms # Path to the directory where Curated Texts are rendered. What `curatedir` is for Curated Texts is, `navpath` is for the rendered versions of Curated Texts. defaultvsn: latest # vsntag that identifies the default terminology. MRG is located at `scopedir`/`glossarydir`/mrg.tev2.latest.yaml license: LICENSE.md # file that contains the (default) licensing conditions. Full URL is `scopedir`/`license` statuses: [ proposed, approved, deprecated ] # list of status tags that are defined for terminological artifacts in this scope issues: https://github.com/tno-terminology-design/tev2-specifications/issues # URL where issues can be raised and handled - website: https://tno-terminology-design.github.io/tev2-specifications/docs/tev2 # base URL for creating links to rendered versions of Curated Texts. It should also serve as the home page of the Tterminology. curators: # contacting individual curators - name: RieksJ email: # we split up the email address to reduce the likelihood of the address being harvested for spamming diff --git a/docs/tev2/unique.py b/docs/tev2/unique.py new file mode 100644 index 0000000000..91b79e10e9 --- /dev/null +++ b/docs/tev2/unique.py @@ -0,0 +1,99 @@ +import os +import pandas as pd +import shutil + +def process_excel_row(row): + # Capitalize the first character of every word in 'original' + original = ' '.join(word.capitalize() for word in row['original'].split()) + + # Replace empty or None values with empty string + termid = str(row['termid']) or '' # Convert to string and handle NaN + formphrase = str(row['formphrase']) or '' # Convert to string and handle NaN + synonymOf = str(row['synonymOf']) or '' # Convert to string and handle NaN + grouptags = str(row['grouptags']) or '' # Convert to string and handle NaN + + today = pd.Timestamp.today().strftime('%Y%m%d') + + # Filenames of files to process and to write + term_file_path = os.path.join('terms', f'{termid}.md') + new_term_file_path = os.path.join('terms', f'_{termid}.md') + + # Check if a term file already exists for the given termid + if os.path.exists(term_file_path): + # Read the existing content of the term file + with open(term_file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # If 'formphrase' is not empty, replace the 'formPhrases' line in the frontmatter + if formphrase.strip(): + frontmatter_start = content.find('---') + 3 + frontmatter_end = content.find('---', frontmatter_start) + frontmatter = content[frontmatter_start:frontmatter_end].strip() + frontmatter_lines = frontmatter.split('\n') + updated_frontmatter_lines = [line.strip() if line.startswith('formPhrases:') else line for line in frontmatter_lines] + updated_frontmatter_lines.append(f'formPhrases: {formphrase}') + updated_frontmatter = '\n'.join(updated_frontmatter_lines) + content = content[:frontmatter_start] + updated_frontmatter + content[frontmatter_end:] + + # Create the new term file with updated content + with open(new_term_file_path, 'w', encoding='utf-8') as f: + f.write(content) + else: + # Create a new term file with the specified template and replace variable placeholders + template = f"""--- +# Docusaurus header +id: {termid} +# TEv2 Curated Text Header +term: {termid} +termType: concept +isa: +glossaryTerm: {original} +glossaryText: "glossary-text for '{{original}}'." +hoverText: "hover-text for '{{original}}'." +synonymOf: {synonymOf} +grouptags: +formPhrases: {formphrase} +# Curation status +status: proposed +created: {today} +updated: {today} +# Origins/Acknowledgements +contributors: RieksJ +attribution: "[TNO Terminology Design](https://tno-terminology-design.github.io/tev2-specifications/docs/tev2)" +originalLicense: "[CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/?ref=chooser-v1)" +--- + +# {original} + +:::caution +The entire section on Terminology Engine v 2 (TEv2) is still under construction.
+As TEv2 is not (yet) available, the texts that specify the tool are still 'raw', i.e. not yet processed.
[readers](@) will need to see through some (currently unprocessed) notational conventions. +::: + +### Summary + +:::info Editor's Note +This file has been automatically created; it's header needs to be revised and its contents needs to be properly written. +::: +""" + template = template.replace('{{original}}', original) + template = template.replace('{{termid}}', termid) + template = template.replace('{{formphrase}}', formphrase) + template = template.replace('{{synonymOf}}', synonymOf) + template = template.replace('{{grouptags}}', grouptags) + template = template.replace('{{today}}', today) + + with open(new_term_file_path, 'w', encoding='utf-8') as f: + f.write(template) + +def main(): + # Read the excel file into a DataFrame + excel_file = 'unique_fields.xlsx' + df = pd.read_excel(excel_file) + + # Process each row in the DataFrame (excluding the header) + for _, row in df[1:].iterrows(): + process_excel_row(row) + +if __name__ == '__main__': + main() diff --git a/docs/tev2/unique_fields.csv b/docs/tev2/unique_fields.csv deleted file mode 100644 index 8555d4799d..0000000000 --- a/docs/tev2/unique_fields.csv +++ /dev/null @@ -1,114 +0,0 @@ -original,processed,termid,formphrase,scopevsn,local URL -act,act,act,act{ss},, -actions,actions,action,action{ss},essif-lab, -actor,actor,actor,actor{ss},essif-lab, -artifacts,artifacts,knowledge-artifact,knowledge-artifact{ss},, -author,author,author,author{ss},, -body,body,body,bod{yies},, -community,community,community,communit{yies},essif-lab, -concept,concept,concept,concept{ss},, -concept-file,concept-file,concept-file,concept-file{ss},, -conceptualization,conceptualization,conceptualization,conceptualization{ss},, -constraints,constraints,constraint,constraint{ss},, -context,context,context,context{ss},, -contributors,contributors,contributor,contributor{ss},, -controlled,controlled,controlled,controlled,, -ctext moustache profile,ctext-moustache-profile,ctext-moustache-profile,ctext-moustache-profile{ss},, -ctext template,ctext-template,ctext-template,ctext-template{ss},, -curate,curate,curate,curate,, -curated,curated,curate,curated,, -curates,curates,curate,curates,, -curating,curating,curate,curating,, -Curation,curation,curate,curation,, -curation,curation,curate,curation,, -curator,curator,curator,curator{ss},, -define,define,define,define,, -defining,defining,define,defining,, -entity,entity,entity,entit{yies},, -formatted texts,formatted-text,formatted-text,formatted-text{ss},, -formphrase,formphrase,formphrase,formphrase{ss},, -form phrases,form-phrases,formphrase,form-phrase{ss},, -governance,governance,governance,,essif-lab, -header,header,header,header{ss},, -HRD,hrd,hrd,hrd{ss},, -HRDTs,hrdts,hrdt,hrdt{ss},, -HRG,hrg,hrg,hrg{ss},, -HRG entries,hrg-entries,hrg-entry,hrg-entr{yies},, -HRGT,hrgt,hrgt,hrgt{ss},, -Human Readable Dictionary,human-readable-dictionary,hrd,human-readable-dictionar{yies},, -human readable glossary,human-readable-glossary,hrg,human-readable-glossar{yies},, -ICT,ict,ict,ict{ss},, -Integrity Checker Tool,integrity-checker-tool,ict,integrity-checker-tool{ss},, -identity pattern,identity-pattern,pattern-identity,identity-model,, -ingested,ingested,ingestion,"ingest, ingests, ingested, ingesting, ingestion",, -ingestion process,ingestion-process,ingestion-process,,, -interpreters,interpreters,,,, -knowledge,knowledge,knowledge,knowledge{ss},, -machine readable dictionary (MRD),machine-readable-dictionary-mrd,mrd,machine-readable-dictionar{yies},, -machine readable dictionary (MRD),machine-readable-dictionary-mrd,mrd,machine-readable-dictionary-mrd,, -machine readable glossary,machine-readable-glossary,mrg,machine-readable-glossar{yies},, -management,management,,,, -manages,manages,,,, -members,members,,,, -model,model,,,, -MRD,mrd,mrd,mrd{ss},, -MRDT,mrdt,mrdt,mrdt{ss},, -MRG,mrg,,mrg{ss},, -MRGT,mrgt,mrgt,mrgt{ss},, -MRG Importer,mrg-importer,mrg-importer,mrg-importer{ss},, -name or phrase,name-or-phrase,term,name{ss}-or-phrase{ss},, -objectives,objectives,objective,objective{ss},essif-lab, -own,own,,,essif-lab, -owned,owned,,,essif-lab, -owner,owner,,,essif-lab, -owning,owning,,,essif-lab, -owns,owns,,,essif-lab, -partial identities,partial-identities,,,essif-lab, -partial identity,partial-identity,,,essif-lab, -Parties,parties,party,,essif-lab, -parties,parties,,,essif-lab, -party,party,,,essif-lab, -party-actor-action model,party-actor-action-model,,,essif-lab, -party's,party-s,,,essif-lab, -pattern,pattern,,,essif-lab, -pattern-governance-and-management,pattern-governance-and-management,,,essif-lab, -pattern-jurisdiction,pattern-jurisdiction,,,essif-lab, -pattern-party-actor-action,pattern-party-actor-action,,,essif-lab, -patterns,patterns,,,essif-lab, -pattern-terminology,pattern-terminology,,,, -reader,reader,,,, -readers,readers,,,, -reader's,reader-s,,,, -ref text,ref-text,,,, -relation,relation,,,, -relations,relations,,,, -SAF,saf,,,, -saf,saf,,,, -SAFs,safs,saf,,, -scoped,scoped,,,, -Self-Sovereign Terminology,self-sovereign-terminology,,,, -subject,subject,,,, -Term,term,,,, -term,term,,,, -term identifier,term-identifier,term-identifier{ss},,, -terminological artifacts,terminological-artifacts,,,, -Terminology Corpus,terminology-corpus,corpus,,, -terminology process,terminology-process,,,, -terminology under construction,terminology-under-construction,,,, -term name,term-name,,,, -term-name,term-name,,,, -term selection criteria,term-selection-criteria,,,, -term type,term-type,,,, -term-type,term-type,,,, -TEv2 toolbox,tev2-toolbox,,,, -things,things,,,, -tools,tools,,,, -transformer,transformer,,,, -TRRT,trrt,,,, -trrt,trrt,,,, -TRRT's,trrt-s,trrt,,, -use-case,use-case,,,, -use cases,use-cases,,,, -use-cases,use-cases,,,, -user,user,,,, -users,users,,,, diff --git a/docs/tev2/unique_fields.xlsx b/docs/tev2/unique_fields.xlsx new file mode 100644 index 0000000000..f9efed4bd7 Binary files /dev/null and b/docs/tev2/unique_fields.xlsx differ diff --git a/docs/tev2/unique_py_specs.txt b/docs/tev2/unique_py_specs.txt new file mode 100644 index 0000000000..b40f523d5d --- /dev/null +++ b/docs/tev2/unique_py_specs.txt @@ -0,0 +1,56 @@ +I want you to help me write a python script, called `unique.py`. +The directory from which that script is called contains a a file `unique_fields.xlsx` (excel format) with five columns. The header row specifies the names: the first column is called `original`, the third column `termid`, the fourth `formphrase`, and the fifth `synonymOf`. +In that directory, there is a subdirectory `terms` that contains markdown files with some yaml frontmatter, and a markdown body. I use `terms directory` to refer to that directory. I use `term file` to refer to a markdown file therein. + +The script should process every line of the excel file (ignoring the first line, as it is the header line). In the instructions that follow, I use +- `{today}` to refer to the text in `yyyymmdd` format that represents today's date which I expect you to compute; +- `{}` as the name of a variable that holds the contents of the row whose header is `` (for example, `{termid}` would be variable that holds the contents of the cell in the row that is processed that is in the column named `termid`). +- `{original}` is the name of a variable tht holds the contents of the cell in the column named `original`, where its contents has been modified by capitalizing the first characgter of every word therein + +When reading cells from the excel file, make sure that variables end up being proper texts, so: +- cells must be read to produce texts, not floating point numbers or whatever; +- cells that contain errors, are empty, void, null, NaN or similar, are considered to contain an empty string. + +Processing of a line of the excel file is as follows. First, check if there is a term file (in the terms directory) whose name is `{termid}.md`, and create a new file called `_{termid}.md` (in the same directory), to which the end-result of the processing will be written. Then, +1. if there is such a `{termid}.md` file, then the result of the processing will be a copy of its contents, with the following modifications: + - if `{formphrase}` contains non-whitespace characters, the line in the frontmatter that starts with `formPhrases`, must be replaced with a line that only contains the text `formPhrases: {formphrase}`. + - if `{grouptags}` contains non-whitespace characters, its contents must be considered as a comma-separated list of words (each of which is a 'grouptag'). The line in the frontmatter that starts with `grouptags:` and that is followed with a comma separated list of words must then be modified such that every grouptag that is not a word therin, is appended to the list of words, such that the list of words remains a comma-separated word list. +2. if there is no such file, the result is the creation of a new one using the text I specify below, which is all text (including the comments) between the two occurrences of `~~~`, and replace every occurrence of a variable name (e.g., `{termid}`) with its value: + +~~~ +--- +# Docusaurus header +id: {termid} +# TEv2 Curated Text Header +term: {termid} +termType: concept +isa: +glossaryTerm: {original} +glossaryText: "glossary-text for `{original}`." +hoverText: "hover-text for `{original}`." +synonymOf: {synonymOf} +grouptags: +formPhrases: {formphrase} +# Curation status +status: proposed +created: {today} +updated: {today} +# Origins/Acknowledgements +contributors: RieksJ +attribution: "[TNO Terminology Design](https://tno-terminology-design.github.io/tev2-specifications/docs/tev2)" +originalLicense: "[CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/?ref=chooser-v1)" +--- + +# {original} + +:::caution +The entire section on Terminology Engine v 2 (TEv2) is still under construction.
+As TEv2 is not (yet) available, the texts that specify the tool are still 'raw', i.e. not yet processed.
[readers](@) will need to see through some (currently unprocessed) notational conventions. +::: + +### Summary + +:::info Editor's Note +This file has been automatically created; it's header needs to be revised and its contents needs to be properly written. +::: +~~~ \ No newline at end of file