diff --git a/.github/workflows/huggingface.yml b/.github/workflows/huggingface.yml index b070979..eb2c48e 100644 --- a/.github/workflows/huggingface.yml +++ b/.github/workflows/huggingface.yml @@ -1,4 +1,4 @@ -name: Publish on Hugging Face Hub +name: Sync with Hugging Face Space on: push: branches: @@ -7,12 +7,12 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Sync with Hugging Face - uses: nateraw/huggingface-sync-action@v0.0.5 + - name: Sync with HF + uses: alex-bene/huggingface-space-sync-action@v0.1 with: - github_repo_id: iscc/iscc-sct - huggingface_repo_id: iscc/iscc-sct - repo_type: space - space_sdk: gradio - private: false + github_repo_id: 'iscc/iscc-sct' + github_branch: 'huggingface' + yaml_header_path: 'space.yml' + huggingface_repo_id: 'iscc/iscc-sct' + hf_username: 'titusz' hf_token: ${{ secrets.HF_TOKEN }} diff --git a/README.md b/README.md index 007c95c..8abfc40 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,3 @@ ---- -title: ISCC-LAB - Semantic-Code Text -emoji: ▶️ -colorFrom: red -colorTo: blue -sdk: gradio -sdk_version: 4.41.0 -pinned: true -license: CC-BY-NC-SA-4.0 -short_description: Cross Lingual Similarity Preserving Text Simprints ---- - # ISCC - Semantic Text-Code [![Tests](https://github.com/iscc/iscc-sct/actions/workflows/tests.yml/badge.svg)](https://github.com/iscc/iscc-core/actions/workflows/tests.yml) @@ -188,9 +176,5 @@ simprints based on larger chunks of text. ## Acknowledgements - Text Chunking: [text-splitter](https://github.com/benbrandt/text-splitter) -- Text Embedding: - [Sentence-Transformer](https://www.sbert.net/docs/sentence_transformer/pretrained_models.html#original-models) - -## License - -This project is licensed under the CC-BY-NC-SA-4.0 International License. +- Text Embeddings: + [Sentence-Transformers](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) diff --git a/iscc_sct/dev.py b/iscc_sct/dev.py new file mode 100644 index 0000000..eeda212 --- /dev/null +++ b/iscc_sct/dev.py @@ -0,0 +1,22 @@ +import pathlib + + +HERE = pathlib.Path(__file__).parent.absolute() + + +def convert_lf(): # pragma: no cover + """Convert line endings to LF""" + crlf = b"\r\n" + lf = b"\n" + extensions = {".py", ".toml", ".lock", ".txt", ".yml", ".sh", ".md"} + n = 0 + for fp in HERE.parent.glob("**/*"): + if fp.suffix in extensions: + with open(fp, "rb") as infile: + content = infile.read() + if crlf in content: + content = content.replace(crlf, lf) + with open(fp, "wb") as outfile: + outfile.write(content) + n += 1 + print(f"{n} files converted to LF") diff --git a/pyproject.toml b/pyproject.toml index d7e209a..ab212ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,11 +84,17 @@ line-length = 119 [tool.ruff.format] line-ending = "lf" +[tool.coverage.run] +omit = ["iscc_sct/dev.py", "tests/"] + [tool.poe.tasks] format-code = { cmd = "ruff format", help = "Code style formating with ruff" } format-markdown = { cmd = "mdformat --wrap 119 --end-of-line lf README.md", help = "Markdown formating with mdformat" } -test = { cmd = "pytest --cov=iscc_sct --cov-fail-under=100 --cov-report=term-missing --color=yes", help = "Run tests with coverage" } -all = ["format-code", "format-markdown", "test"] +convert-lf = { script = "iscc_sct.dev:convert_lf", help = "Convert line endings to LF"} +test = { cmd = "pytest --cov=iscc_sct --cov-fail-under=100", help = "Run tests with coverage" } +update-dependencies = { cmd = "poetry update", help = "Update dependencies" } +all = ["format-code", "format-markdown", "convert-lf", "test"] +update = ["update-dependencies", "all"] [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/space.yml b/space.yml new file mode 100644 index 0000000..4e34c64 --- /dev/null +++ b/space.yml @@ -0,0 +1,9 @@ +title: ISCC-LAB - Semantic-Code Text +emoji: ▶️ +colorFrom: red +colorTo: blue +sdk: gradio +sdk_version: 4.41.0 +pinned: true +license: CC-BY-NC-SA-4.0 +short_description: Cross Lingual Similarity Preserving Text Simprints