Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eadc4080b1 | ||
|
|
9219e3656b | ||
|
|
76d67cff7a | ||
|
|
b076b6a9cf | ||
|
|
2781114863 | ||
|
|
51a8c8b425 | ||
|
|
8c87cdf0b2 | ||
|
|
c0d49d23bb | ||
|
|
98df3f2372 | ||
|
|
c85813ad76 | ||
|
|
3774d3eba1 | ||
|
|
e3b97814ea | ||
|
|
62c927d5ec | ||
|
|
c000168dc6 | ||
|
|
9739a73265 |
16
.gitignore
vendored
16
.gitignore
vendored
@@ -1,9 +1,9 @@
|
||||
**/*.sw*
|
||||
spike/review/reinvent/ezmded/server/ezmded
|
||||
spike/review/reinvent/ezmded/server/server
|
||||
spike/review/reinvent/ezmded/server/testdata/files/**/*
|
||||
spike/review/reinvent/ezmded/server/testdata/workd/**/*
|
||||
spike/review/reinvent/ezmded/server/testdata/media/**/*
|
||||
spike/review/reinvent/ezmded/server/testdata/index.html
|
||||
spike/review/reinvent/ezmded/ui/render
|
||||
spike/review/reinvent/ezmded/ui/**/.*.html
|
||||
server/ezmded
|
||||
server/server
|
||||
server/testdata/files/**/*
|
||||
server/testdata/workd/**/*
|
||||
server/testdata/media/**/*
|
||||
server/testdata/index.html
|
||||
ui/render
|
||||
ui/**/.*.html
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../../spike/review/run.sh
|
||||
76
crawler/google.sh
Normal file
76
crawler/google.sh
Normal file
@@ -0,0 +1,76 @@
|
||||
#! /bin/bash
|
||||
|
||||
google() (
|
||||
_is_slides() {
|
||||
echo "$@" | grep -q 'docs.google.com.presentation'
|
||||
}
|
||||
|
||||
_is_sheets() {
|
||||
echo "$@" | grep -q 'docs.google.com.spreadsheets'
|
||||
}
|
||||
|
||||
_is_doc() {
|
||||
echo "$@" | grep -q 'docs.google.com.document'
|
||||
}
|
||||
|
||||
is() {
|
||||
_is_sheets "$@" || _is_doc "$@" || _is_slides "$@"
|
||||
}
|
||||
|
||||
human_url() {
|
||||
echo "$1"
|
||||
}
|
||||
|
||||
get() {
|
||||
local url="$1"
|
||||
local id="${url%/*}"
|
||||
id="${id##*/}"
|
||||
local downloaded="$(rclone get_google "$id")"
|
||||
echo "# ${downloaded##*/}"
|
||||
echo ""
|
||||
if [ "${downloaded##*.}" == "csv" ]; then
|
||||
_csv_to_md "$downloaded"
|
||||
elif [ "${downloaded##*.}" == "html" ]; then
|
||||
_html_to_md "$downloaded"
|
||||
else
|
||||
cat "$downloaded"
|
||||
fi
|
||||
}
|
||||
|
||||
_html_to_md() {
|
||||
which pandoc &> /dev/null
|
||||
local f="$1"
|
||||
#log f=$f
|
||||
cat "$f" \
|
||||
| sed 's/.*<body/<body/' \
|
||||
| sed 's/<\/body>.*/<\/body>/' \
|
||||
| sed 's/<[\/]*span[^>]*>//g' \
|
||||
| perl -pe 's|<div class="c[0-9][0-9]*">.*?<\/div>||g' \
|
||||
| sed 's/<\([a-z][a-z]*\)[^>]*/<\1/g' \
|
||||
| pandoc - -f html -t commonmark -s -o - \
|
||||
| sed 's/^<[\/]*div>$//g'
|
||||
}
|
||||
|
||||
_csv_to_md() {
|
||||
local f="$1"
|
||||
(
|
||||
head -n 1 "$f"
|
||||
head -n 1 "$f" \
|
||||
| sed 's/^[^,][^,]*/--- /' \
|
||||
| sed 's/[^,][^,]*$/ ---/' \
|
||||
| sed 's/,[^,][^,]*/, --- /g' \
|
||||
| sed 's/[^|]$/|/'
|
||||
tail -n +2 "$f"
|
||||
) \
|
||||
| grep . \
|
||||
| sed 's/,/ | /g' \
|
||||
| sed 's/^/| /'
|
||||
}
|
||||
|
||||
expand() {
|
||||
get "$@" | head -n 1 | sed 's/^[#]* //' | base64
|
||||
}
|
||||
|
||||
"$@"
|
||||
)
|
||||
|
||||
@@ -4,6 +4,7 @@ main() {
|
||||
config
|
||||
log crawling ids...
|
||||
for id in $(crawlable_ids); do
|
||||
log crawling id $id
|
||||
crawl "$id"
|
||||
done
|
||||
log rewriting ids...
|
||||
@@ -20,8 +21,12 @@ config() {
|
||||
export CACHE_DURATION=$((60*50))
|
||||
export NOTES_ADDR="${NOTES_ADDR:-"http://localhost:3004"}"
|
||||
export GITLAB_PAT="$GITLAB_PAT"
|
||||
export RCLONE_CONFIG="$RCLONE_CONFIG"
|
||||
export RCLONE_CONFIG_PASS="$RCLONE_CONFIG_PASS"
|
||||
source ./gitlab.sh
|
||||
source ./gitlab_wiki.sh
|
||||
source ./google.sh
|
||||
source ./rclone.sh
|
||||
source ./cache.sh
|
||||
source ./notes.sh
|
||||
}
|
||||
@@ -56,12 +61,7 @@ crawlable_ids() {
|
||||
}
|
||||
|
||||
crawl() {
|
||||
local cache_key="crawled $*"
|
||||
# TODO
|
||||
if false && cache get "$cache_key"; then
|
||||
return
|
||||
fi
|
||||
_crawl "$@" | cache put "$cache_key"
|
||||
_crawl "$@"
|
||||
}
|
||||
|
||||
_crawl() {
|
||||
@@ -73,7 +73,7 @@ _crawl() {
|
||||
"$id"
|
||||
)"
|
||||
local crawlable_source="$(extract_crawlable_source "$content")"
|
||||
for backend in gitlab gitlab_wiki; do
|
||||
for backend in gitlab gitlab_wiki google; do
|
||||
if $backend is "$crawlable_source"; then
|
||||
crawl_with $backend "$json"
|
||||
return $?
|
||||
@@ -149,6 +149,7 @@ crawl_with() {
|
||||
ID="${ID%/}"
|
||||
log " $ID ($TITLE): ${#CONTENT}"
|
||||
push_crawled "$ID" "$TITLE" "$CONTENT"
|
||||
log " /$ID ($TITLE): ${#CONTENT}"
|
||||
}
|
||||
if [ "${#expanded[@]}" -gt 0 ]; then
|
||||
for i in $(seq 0 $(("${#expanded[@]}"-1))); do
|
||||
@@ -166,7 +167,7 @@ push_crawled() {
|
||||
is_crawlable() {
|
||||
local crawlable_source="$(extract_crawlable_source "$*")"
|
||||
# https://unix.stackexchange.com/questions/181254/how-to-use-grep-and-cut-in-script-to-obtain-website-urls-from-an-html-file
|
||||
local url_pattern="(http|https)://[a-zA-Z0-9./?=_%:-]*"
|
||||
local url_pattern="(http|https)://[a-zA-Z0-9./?=_%:\-\#--]*"
|
||||
echo "$crawlable_source" | cut -c 1-300 | grep -q -E "^[ ]*$url_pattern[ ]*$"
|
||||
}
|
||||
|
||||
@@ -2,11 +2,15 @@
|
||||
|
||||
notes() (
|
||||
ids() {
|
||||
_recurse_ids "" "$(_tree)"
|
||||
_recurse_ids "$(_tree)"
|
||||
}
|
||||
|
||||
_tree() {
|
||||
__tree "$@"
|
||||
local cache_key="notes _tree"
|
||||
if CACHE_DURATION=5 cache get "$cache_key"; then
|
||||
return 0
|
||||
fi
|
||||
__tree "$@" | cache put "$cache_key"
|
||||
}
|
||||
|
||||
__tree() {
|
||||
@@ -18,8 +22,7 @@ notes() (
|
||||
}
|
||||
|
||||
_recurse_ids() {
|
||||
local prefix="$1"
|
||||
local json="$2"
|
||||
local json="$1"
|
||||
if echo "$json" | jq .Branches | grep -q ^null$; then
|
||||
return 0
|
||||
fi
|
||||
@@ -29,22 +32,32 @@ notes() (
|
||||
fi
|
||||
for line in $b64lines; do
|
||||
line="$(echo "$line" | base64 --decode)"
|
||||
local subfix="$(printf "%s/%s" "$prefix" "$line")"
|
||||
subfix="${subfix#/}"
|
||||
if ! _is_deleted "$subfix"; then
|
||||
echo "$subfix"
|
||||
if ! _is_deleted "$line"; then
|
||||
echo "$line"
|
||||
_recurse_ids "$(echo "$json" | jq -c ".Branches[\"$line\"]")"
|
||||
fi
|
||||
_recurse_ids "$subfix" "$(echo "$json" | jq -c ".Branches[\"$line\"]")"
|
||||
done
|
||||
}
|
||||
|
||||
meta() {
|
||||
local cache_key="notes meta $*"
|
||||
if CACHE_DURATION=5 cache get "$cache_key"; then
|
||||
return 0
|
||||
fi
|
||||
_meta "$@" | cache put "$cache_key"
|
||||
}
|
||||
|
||||
_meta() {
|
||||
local id="$1"
|
||||
local tree="$(_tree)"
|
||||
for subid in ${id//\// }; do
|
||||
tree="$(echo "$tree" | jq -c .Branches | jq -c ".[\"$subid\"]")"
|
||||
local pid="${id%%/*}"
|
||||
while [ "$id" != "$pid" ]; do
|
||||
tree="$(echo "$tree" | jq ".Branches[\"$pid\"]")"
|
||||
local to_add="${id#$pid/}"
|
||||
to_add="${to_add%%/*}"
|
||||
pid="$pid/$to_add"
|
||||
done
|
||||
echo "$tree" | jq .Leaf
|
||||
echo "$tree" | jq ".Branches[\"$id\"].Leaf"
|
||||
}
|
||||
|
||||
_is_deleted() {
|
||||
@@ -90,11 +103,11 @@ notes() (
|
||||
local id="$1"
|
||||
local title="$2"
|
||||
local body="$3"
|
||||
echo "$body" | _nncurl \
|
||||
_nncurl \
|
||||
-X PUT \
|
||||
-H "Title: $title" \
|
||||
-d "$body" \
|
||||
$NOTES_ADDR/api/v0/files/$id
|
||||
$NOTES_ADDR/api/v0/files/$id >&2
|
||||
}
|
||||
|
||||
"$@"
|
||||
62
crawler/rclone.sh
Normal file
62
crawler/rclone.sh
Normal file
@@ -0,0 +1,62 @@
|
||||
#! /bin/bash
|
||||
|
||||
rclone() (
|
||||
get_google() {
|
||||
local cache_key="rclone get google 2 $*"
|
||||
if cache get "$cache_key"; then
|
||||
return 0
|
||||
fi
|
||||
_get_google "$@" | cache put "$cache_key"
|
||||
}
|
||||
|
||||
_get_google() {
|
||||
_rate_limit
|
||||
local id="$1"
|
||||
local out="$(mktemp -d)"
|
||||
_cmd backend copyid work-notes-google: --drive-export-formats=csv,html,txt "$id" "$out/"
|
||||
find "$out" -type f
|
||||
}
|
||||
|
||||
_rate_limit() {
|
||||
local f="/tmp/rclone.rate.limit"
|
||||
local last=0
|
||||
if [ -f "$f" ]; then
|
||||
last="$(date -r "$f" +%s)"
|
||||
fi
|
||||
local now="$(date +%s)"
|
||||
local since_last=$((now-last))
|
||||
if ((since_last>2)); then
|
||||
dur=-2
|
||||
fi
|
||||
dur=$((dur+2))
|
||||
sleep $dur
|
||||
touch "$f"
|
||||
}
|
||||
|
||||
_ensure() {
|
||||
which rclone &> /dev/null && rclone version &> /dev/null
|
||||
}
|
||||
|
||||
_cmd() {
|
||||
_ensure_google_config
|
||||
__cmd "$@"
|
||||
}
|
||||
|
||||
__cmd() {
|
||||
_ensure
|
||||
RCLONE_CONFIG_PASS="$RCLONE_CONFIG_PASS" \
|
||||
$(which rclone) \
|
||||
--config "$RCLONE_CONFIG" \
|
||||
--size-only \
|
||||
--fast-list \
|
||||
--retries 10 \
|
||||
--retries-sleep 10s \
|
||||
"$@"
|
||||
}
|
||||
|
||||
_ensure_google_config() {
|
||||
__cmd config show | grep -q work-notes-google
|
||||
}
|
||||
|
||||
"$@"
|
||||
)
|
||||
31
server/go.mod
Normal file
31
server/go.mod
Normal file
@@ -0,0 +1,31 @@
|
||||
module ezmded
|
||||
|
||||
go 1.17
|
||||
|
||||
require (
|
||||
github.com/google/uuid v1.3.0
|
||||
go.mongodb.org/mongo-driver v1.7.2
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
local/args v0.0.0-00010101000000-000000000000
|
||||
local/gziphttp v0.0.0-00010101000000-000000000000
|
||||
local/router v0.0.0-00010101000000-000000000000
|
||||
local/simpleserve v0.0.0-00010101000000-000000000000
|
||||
)
|
||||
|
||||
require github.com/go-stack/stack v1.8.0 // indirect
|
||||
|
||||
replace local/args => ../../../../args
|
||||
|
||||
replace local/logb => ../../../../logb
|
||||
|
||||
replace local/storage => ../../../../storage
|
||||
|
||||
replace local/router => ../../../../router
|
||||
|
||||
replace local/simpleserve => ../../../../simpleserve
|
||||
|
||||
replace local/gziphttp => ../../../../gziphttp
|
||||
|
||||
replace local/notes-server => ../../../../notes-server
|
||||
|
||||
replace local/oauth2 => ../../../../oauth2
|
||||
@@ -1,15 +1,15 @@
|
||||
todo:
|
||||
- scrape odo
|
||||
- mark generated via meta so other files in the dir can be created, deleted, replaced safely
|
||||
- rewrite links if available to local
|
||||
- table of contents
|
||||
- anchor per line
|
||||
- scrape odo
|
||||
- scrape gdoc
|
||||
- scrape gsheet
|
||||
- scrape gslide
|
||||
- anchor links work
|
||||
- rewrite links if available to local
|
||||
- ui; last updated; 2022.02.01T12:34:56
|
||||
- mark generated via meta so other files in the dir can be created, deleted, replaced safely
|
||||
done:
|
||||
- scrape gslide
|
||||
- scrape gsheet
|
||||
- scrape gdoc
|
||||
- alert box; https://concisecss.com/documentation/ui
|
||||
- hide checkbox for tree
|
||||
- do not rewrite .md title vs. link cause hrefs to ./gobs.md wont work
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
@@ -53,7 +54,6 @@ func (base Leaf) Merge(updated Leaf) Leaf {
|
||||
|
||||
type Tree struct {
|
||||
root string
|
||||
cachedRoot Branch
|
||||
}
|
||||
|
||||
func NewTree(root string) Tree {
|
||||
@@ -62,25 +62,72 @@ func NewTree(root string) Tree {
|
||||
|
||||
func (tree Tree) WithRoot(root string) Tree {
|
||||
tree.root = root
|
||||
tree.cachedRoot = Branch{}
|
||||
return tree
|
||||
}
|
||||
|
||||
func (tree Tree) GetRootMeta() (Branch, error) {
|
||||
return tree.getRoot(NewID(""), false, false)
|
||||
if meta, ok := tree.getCachedRootMeta(); ok {
|
||||
return meta, nil
|
||||
}
|
||||
got, err := tree.getRoot(NewID(""), false, false)
|
||||
if err != nil {
|
||||
return Branch{}, err
|
||||
}
|
||||
tree.cacheRootMeta(got)
|
||||
return got, err
|
||||
}
|
||||
|
||||
func (tree Tree) GetRoot() (Branch, error) {
|
||||
if !tree.cachedRoot.IsZero() {
|
||||
return tree.cachedRoot, nil
|
||||
if root, ok := tree.getCachedRoot(); ok {
|
||||
return root, nil
|
||||
}
|
||||
got, err := tree.getRoot(NewID(""), true, false)
|
||||
if err == nil {
|
||||
tree.cachedRoot = got
|
||||
if err != nil {
|
||||
return Branch{}, err
|
||||
}
|
||||
tree.cacheRoot(got)
|
||||
return got, err
|
||||
}
|
||||
|
||||
func (tree Tree) getCachedRoot() (Branch, bool) {
|
||||
return tree.getCachedFrom("root.json")
|
||||
}
|
||||
|
||||
func (tree Tree) getCachedRootMeta() (Branch, bool) {
|
||||
return tree.getCachedFrom("root_meta.json")
|
||||
}
|
||||
|
||||
func (tree Tree) getCachedFrom(name string) (Branch, bool) {
|
||||
b, err := ioutil.ReadFile(path.Join(tree.root, name))
|
||||
if err != nil {
|
||||
return Branch{}, false
|
||||
}
|
||||
var branch Branch
|
||||
err = json.Unmarshal(b, &branch)
|
||||
return branch, err == nil
|
||||
}
|
||||
|
||||
func (tree Tree) cacheRoot(branch Branch) {
|
||||
tree.cacheRootFrom("root.json", branch)
|
||||
}
|
||||
|
||||
func (tree Tree) cacheRootMeta(branch Branch) {
|
||||
tree.cacheRootFrom("root_meta.json", branch)
|
||||
}
|
||||
|
||||
func (tree Tree) cacheRootFrom(name string, branch Branch) {
|
||||
b, err := json.Marshal(branch)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
ensureAndWrite(path.Join(tree.root, name), b)
|
||||
}
|
||||
|
||||
func (tree Tree) cacheClear() {
|
||||
os.Remove(path.Join(path.Join(tree.root, "root.json")))
|
||||
os.Remove(path.Join(path.Join(tree.root, "root_meta.json")))
|
||||
}
|
||||
|
||||
func (tree Tree) getRoot(pid ID, withContent, withDeleted bool) (Branch, error) {
|
||||
m := Branch{Branches: map[ID]Branch{}}
|
||||
entries, err := os.ReadDir(tree.root)
|
||||
@@ -92,7 +139,7 @@ func (tree Tree) getRoot(pid ID, withContent, withDeleted bool) (Branch, error)
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if entry.Name() == "data.yaml" {
|
||||
if b, err := ioutil.ReadFile(path.Join(tree.root, entry.Name())); err != nil {
|
||||
if b, err := peekLeaf(withContent, path.Join(tree.root, entry.Name())); err != nil {
|
||||
return Branch{}, err
|
||||
} else if err := yaml.Unmarshal(b, &m.Leaf); err != nil {
|
||||
return Branch{}, err
|
||||
@@ -115,6 +162,10 @@ func (tree Tree) getRoot(pid ID, withContent, withDeleted bool) (Branch, error)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func peekLeaf(all bool, path string) ([]byte, error) {
|
||||
return ioutil.ReadFile(path)
|
||||
}
|
||||
|
||||
func (tree Tree) toDir(id ID) string {
|
||||
return path.Dir(tree.toData(id))
|
||||
}
|
||||
@@ -124,6 +175,7 @@ func (tree Tree) toData(id ID) string {
|
||||
}
|
||||
|
||||
func (tree Tree) Put(id ID, input Leaf) error {
|
||||
tree.cacheClear()
|
||||
if _, err := os.Stat(tree.toData(id)); os.IsNotExist(err) {
|
||||
b, err := yaml.Marshal(Leaf{})
|
||||
if err != nil {
|
||||
@@ -144,11 +196,11 @@ func (tree Tree) Put(id ID, input Leaf) error {
|
||||
if err := ensureAndWrite(tree.toData(id), b); err != nil {
|
||||
return err
|
||||
}
|
||||
tree.cachedRoot = Branch{}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tree Tree) Del(id ID) error {
|
||||
tree.cacheClear()
|
||||
got, err := tree.Get(id)
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
@@ -164,8 +216,8 @@ func (tree Tree) Del(id ID) error {
|
||||
}
|
||||
|
||||
func (tree Tree) HardDel(id ID) error {
|
||||
tree.cacheClear()
|
||||
os.RemoveAll(tree.toDir(id))
|
||||
tree.cachedRoot = Branch{}
|
||||
return nil
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -1,31 +0,0 @@
|
||||
module ezmded
|
||||
|
||||
go 1.17
|
||||
|
||||
require (
|
||||
github.com/google/uuid v1.3.0
|
||||
go.mongodb.org/mongo-driver v1.7.2
|
||||
gopkg.in/yaml.v2 v2.4.0
|
||||
local/args v0.0.0-00010101000000-000000000000
|
||||
local/gziphttp v0.0.0-00010101000000-000000000000
|
||||
local/router v0.0.0-00010101000000-000000000000
|
||||
local/simpleserve v0.0.0-00010101000000-000000000000
|
||||
)
|
||||
|
||||
require github.com/go-stack/stack v1.8.0 // indirect
|
||||
|
||||
replace local/args => ../../../../../../../../args
|
||||
|
||||
replace local/logb => ../../../../../../../../logb
|
||||
|
||||
replace local/storage => ../../../../../../../../storage
|
||||
|
||||
replace local/router => ../../../../../../../../router
|
||||
|
||||
replace local/simpleserve => ../../../../../../../../simpleserve
|
||||
|
||||
replace local/gziphttp => ../../../../../../../../gziphttp
|
||||
|
||||
replace local/notes-server => ../../../../../../../../notes-server
|
||||
|
||||
replace local/oauth2 => ../../../../../../../../oauth2
|
||||
@@ -113,7 +113,7 @@
|
||||
element: document.getElementById('my-text-area'),
|
||||
forceSync: true,
|
||||
indentWithTabs: false,
|
||||
initialValue: "loading...",
|
||||
initialValue: "{{ .This.Content }}",
|
||||
showIcons: ["code", "table"],
|
||||
spellChecker: false,
|
||||
sideBySideFullscreen: false,
|
||||
@@ -133,6 +133,5 @@
|
||||
},
|
||||
status: ["lines", "words", "cursor"],
|
||||
})
|
||||
easyMDE.value({{ .This.Content }})
|
||||
</script>
|
||||
{{ end }}
|
||||
@@ -75,6 +75,9 @@
|
||||
.tb_fullscreen {
|
||||
height: 100%;
|
||||
}
|
||||
.button, button, input[type="button"] {
|
||||
height: auto;
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
function http(method, remote, callback, body, headers) {
|
||||
Reference in New Issue
Block a user