Compare commits
5 commits
fb1f3af984
...
10420e0f99
Author | SHA1 | Date | |
---|---|---|---|
10420e0f99 | |||
f0dd01f79a | |||
b97d025ee7 | |||
ab5791e738 | |||
3be8b564cb |
13
G/G
13
G/G
|
@ -99,6 +99,7 @@ B<grep>
|
|||
my $i = 0;
|
||||
my @add_X;
|
||||
|
||||
# Make groups of grep options: -v foo -v -i bar => [-v foo] [-v -i bar]
|
||||
for(@ARGV) {
|
||||
if($_ eq "-g") {
|
||||
# -g = recursive-and file grep
|
||||
|
@ -122,6 +123,7 @@ for(@ARGV) {
|
|||
}
|
||||
|
||||
if($opt::g and @cmd) {
|
||||
# -g => search files
|
||||
sub gitdir {
|
||||
# Find .git dir somewhere in parent
|
||||
my $dir = shift;
|
||||
|
@ -140,23 +142,28 @@ if($opt::g and @cmd) {
|
|||
`find "$dir" -type f -print0 | xargs -0 cat >/dev/null`;
|
||||
}
|
||||
my $a = shift @cmd;
|
||||
# -v => Use -L instead of -l
|
||||
my $l_or_L = (grep /^-v$/, @$a) ? "L" : "l";
|
||||
@$a = (grep { not /^-v$/ } @$a);
|
||||
|
||||
my $gitdir = gitdir(".");
|
||||
if($gitdir) {
|
||||
cache_gitdir($gitdir);
|
||||
$run = 'git grep --threads 30 -l '.shell_quote(@$a);
|
||||
$run = "git grep --threads 30 -$l_or_L ".shell_quote(@$a);
|
||||
} else {
|
||||
$run = 'find . -type f | parallel --lb -Xq grep -l '.shell_quote(@$a);
|
||||
$run = "find . -type f | parallel --lb -Xq grep -$l_or_L ".shell_quote(@$a);
|
||||
}
|
||||
if(@cmd) {
|
||||
$run .= '|' .
|
||||
join"|", map { 'xargs -d"\n" grep -l '.
|
||||
join"|", map { 'xargs -d"\n" grep -'.$l_or_L.' '.
|
||||
join(" ", shell_quote(@$_)) } @cmd;
|
||||
}
|
||||
exec $run;
|
||||
} elsif(@cmd) {
|
||||
# => search stdin
|
||||
exec join"|", map { "grep ".join(" ", shell_quote(@$_)) } @cmd;
|
||||
} else {
|
||||
# no options => cat
|
||||
exec 'cat';
|
||||
}
|
||||
|
||||
|
|
18
Makefile
18
Makefile
|
@ -1,18 +1,20 @@
|
|||
CMD = 2grep 2search audioping blink burncpu bwlimit clipboard drac \
|
||||
duplicate-packets em emoticons encdir fanspeed field \
|
||||
find-first-fail find-optimal forever fxkill G gitnext gitundo \
|
||||
goodpasswd histogram Loffice mtrr mirrorpdf neno not off \
|
||||
pdfman pidcmd pidtree plotpipe puniq ramusage rand rclean \
|
||||
rina rn rrm seekmaniac shython sound-reload splitvideo stdout \
|
||||
swapout T teetime timestamp tracefile transpose upsidedown \
|
||||
vid w4it-for-port-open whitehash wifi-reload wssh \
|
||||
youtube-lbry ytv yyyymmdd
|
||||
find-first-fail find-optimal forever ft-udvalg fxkill G \
|
||||
gitdiffdir gitedit gitnext gitundo goodpasswd histogram \
|
||||
Loffice mtrr mirrorpdf neno not off pdfman pidcmd pidtree \
|
||||
plotpipe puniq ramusage rand rclean rina rn rrm seekmaniac \
|
||||
shython sound-reload splitvideo stdout swapout T teetime \
|
||||
timestamp tracefile transpose upsidedown vid \
|
||||
w4it-for-port-open whitehash wifi-reload wssh youtube-lbry \
|
||||
ytv yyyymmdd
|
||||
|
||||
all: 2search/2grep.1 2search/2search.1 blink/blink.1 \
|
||||
burncpu/burncpu.1 bwlimit/bwlimit.1 clipboard/clipboard.1 \
|
||||
drac/drac.1 encdir/encdir.1 fanspeed/fanspeed.1 field/field.1 \
|
||||
find-first-fail/find-first-fail.1 find-optimal/find-optimal.1 \
|
||||
G/G.1 gitnext/gitnext.1 gitundo/gitundo.1 \
|
||||
ft-udvalg/ft-udvalg.1 G/G.1 gitdiffdir/gitdiffdir.1 \
|
||||
gitedit/gitedit.1 gitnext/gitnext.1 gitundo/gitundo.1 \
|
||||
goodpasswd/goodpasswd.1 histogram/histogram.1 \
|
||||
mirrorpdf/mirrorpdf.1 neno/neno.1 off/off.1 pdfman/pdfman.1 \
|
||||
pidcmd/pidcmd.1 pidtree/pidtree.1 plotpipe/plotpipe.1 \
|
||||
|
|
6
README
6
README
|
@ -24,8 +24,14 @@ find-first-fail - find the lowest argument that makes a command fail.
|
|||
|
||||
forever - run the same command or list of commands every second.
|
||||
|
||||
ft-udvalg - Download udvalgsmembers from folketing.dk as ODS.
|
||||
|
||||
G - shorthand for multi level grep.
|
||||
|
||||
gitdiffdir - git diff between two dirs.
|
||||
|
||||
gitedit - edit last 10 commits.
|
||||
|
||||
gitnext - checkout next revision. Opposite of 'checkout HEAD^'.
|
||||
|
||||
gitundo - undo commit.
|
||||
|
|
|
@ -52,9 +52,23 @@ Equivalent to: B<tee >>B<(xsel -i -b) | xclip -i >>B</dev/null; xsel -o -b>
|
|||
cat | clipboard | cat
|
||||
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<-f>
|
||||
|
||||
Run B<clipboard> every second. Only print output when it
|
||||
changes. Similar to B<tail -f>.
|
||||
|
||||
Select the text 'END' to stop.
|
||||
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2023 Ole Tange,
|
||||
Copyright (C) 2023-2024 Ole Tange,
|
||||
http://ole.tange.dk and Free Software Foundation, Inc.
|
||||
|
||||
|
||||
|
@ -87,6 +101,19 @@ B<tee>(1), B<xclip>(1), B<xsel>(1)
|
|||
#debug_log=/tmp/T-debug
|
||||
debug_log=/dev/null
|
||||
|
||||
if [ "-f" = "$1" ]; then
|
||||
# run until c = END
|
||||
while [ "$c" != "END" ]; do
|
||||
c=$(clipboard 2>/dev/null)
|
||||
if [ "$c" != "$last" ]; then
|
||||
echo "$c"
|
||||
last="$c"
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if tty -s ; then
|
||||
# STDIN is terminal
|
||||
# Don't care what STDOUT is
|
||||
|
|
200
ft-udvalg/ft-udvalg
Executable file
200
ft-udvalg/ft-udvalg
Executable file
|
@ -0,0 +1,200 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
"""
|
||||
=pod
|
||||
|
||||
=encoding UTF-8
|
||||
|
||||
=head1 NAME
|
||||
|
||||
ft-udvalg - Download udvalgsmembers from folketing.dk as ODS
|
||||
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<ft-udvalg>
|
||||
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
B<ft-udvalg> will walk through REU, BEU, BUU, EPI, ERU, EUU, FIU, FOU,
|
||||
FÆU, GRU, BOU, IFU, KIU, KEF, KUU, LIU, MOF, SAU, SOU, SUU, TRU, UFO,
|
||||
URU, UUI, ULØ, and UVP, select all the members, add their email
|
||||
addresses, and put it in and ODS-file that is easy to use with Auto
|
||||
Filter.
|
||||
|
||||
ft.dk requires your IP address to be from Denmark. Otherwise you will
|
||||
be blocked by CloudFlare.
|
||||
|
||||
=head1 EXAMPLE
|
||||
|
||||
Generate ft-udvalgsmedlemmer.ods:
|
||||
|
||||
ft-udvalg
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2024 Ole Tange,
|
||||
http://ole.tange.dk and Free Software Foundation, Inc.
|
||||
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
at your option any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
B<ft-udvalg> uses B<python3>, and a number of Python modules.
|
||||
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
B<python3>
|
||||
|
||||
=cut
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import requests
|
||||
import requests_cache
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import PyPDF2
|
||||
import re
|
||||
|
||||
# Enable logging for requests-cache
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
# Initialize the cache
|
||||
cache_dir = os.path.expanduser("~/.cache/ft-udvalg")
|
||||
requests_cache.install_cache(cache_name=cache_dir, backend='sqlite', expire_after=86400) # Cache expires after 1 day
|
||||
|
||||
base_url = "https://www.ft.dk"
|
||||
|
||||
udvalg = [
|
||||
"reu", "beu", "buu", "epi", "eru", "euu", "fiu", "fou", "fæu",
|
||||
"gru", "bou", "ifu", "kiu", "kef", "kuu", "liu", "mof", "sau",
|
||||
"sou", "suu", "tru", "ufo", "uru", "uui", "ulø", "uvp"
|
||||
]
|
||||
|
||||
# Step 1: Extract member links from the provided URL
|
||||
def extract_members(udvalg_url):
|
||||
response = requests.get(udvalg_url)
|
||||
logging.debug(f"Fetching {udvalg_url}, from cache: {response.from_cache}")
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
members = []
|
||||
|
||||
for td_tag in soup.find_all('td', {'data-title': 'Navn'}):
|
||||
a_tag = td_tag.find('a', href=True)
|
||||
if a_tag:
|
||||
url = a_tag['href'] if a_tag['href'].startswith(base_url + '/medlemmer/mf/') else base_url + a_tag['href']
|
||||
members.append({"biopage": url})
|
||||
return members
|
||||
|
||||
# Step 2: Extract the name and PDF URL for each member
|
||||
def extract_pdf_url(member_url):
|
||||
response = requests.get(member_url)
|
||||
logging.debug(f"Fetching {member_url}, from cache: {response.from_cache}")
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
name = soup.find('h1', class_='biography-page-title').text.strip()
|
||||
match = re.match(r'^(.*)\s\((.*)\)$', name)
|
||||
if match:
|
||||
name, party = match.groups()
|
||||
else:
|
||||
raise ValueError("Text format does not match 'Name (Party)'")
|
||||
pdf_url = next((button['href'] for button in soup.select('a.download__container__docBtns__btn') if "CV" in button.get_text()), None)
|
||||
|
||||
if pdf_url and not pdf_url.startswith(base_url):
|
||||
pdf_url = base_url + pdf_url
|
||||
|
||||
return {'Navn': name, 'Parti': party, 'CV': pdf_url}
|
||||
|
||||
# Step 3: Extract email from the PDF
|
||||
def extract_email_from_pdf(member):
|
||||
pdf_url = member["CV"]
|
||||
if not pdf_url:
|
||||
return None
|
||||
|
||||
try:
|
||||
response = requests.get(pdf_url)
|
||||
logging.debug(f"Fetching {pdf_url}, from cache: {response.from_cache}")
|
||||
pdf_path = 'temp.pdf'
|
||||
|
||||
with open(pdf_path, 'wb') as file:
|
||||
file.write(response.content)
|
||||
|
||||
reader = PyPDF2.PdfFileReader(pdf_path)
|
||||
email = None
|
||||
|
||||
for page_num in range(reader.numPages):
|
||||
# Replace \xad with -
|
||||
text = (reader.getPage(page_num).extract_text()).replace('\xad', '-')
|
||||
email_match = re.search(r'E[^a-z]*mail:\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text)
|
||||
if email_match:
|
||||
email = email_match.group(1)
|
||||
break
|
||||
|
||||
return email
|
||||
|
||||
except PyPDF2.errors.PdfReadError as e:
|
||||
logging.error(f"Failed to read PDF for member {member['Navn']} with URL {pdf_url}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred while processing member {member['Navn']} with URL {pdf_url}: {e}")
|
||||
return None
|
||||
finally:
|
||||
if os.path.exists(pdf_path):
|
||||
os.remove(pdf_path)
|
||||
|
||||
# Process members from each committee
|
||||
udv_members = {}
|
||||
for udv in udvalg:
|
||||
udvalg_url = f"{base_url}/da/udvalg/udvalgene/{udv}/medlemsoversigt"
|
||||
udv_members[udv] = extract_members(udvalg_url)
|
||||
|
||||
# Consolidate members
|
||||
members = {}
|
||||
for udv, member_list in udv_members.items():
|
||||
for member in member_list:
|
||||
if member["biopage"] not in members:
|
||||
members[member["biopage"]] = {"biopage": member["biopage"]}
|
||||
members[member["biopage"]][udv.upper()] = 1 # Mark membership
|
||||
|
||||
# Extract additional data for each unique member
|
||||
for member in members.values():
|
||||
pdf_data = extract_pdf_url(member["biopage"])
|
||||
member.update(pdf_data)
|
||||
member['Email'] = extract_email_from_pdf(member) if member["CV"] else None
|
||||
|
||||
# Convert the members dictionary to a list of dictionaries
|
||||
members_list = list(members.values())
|
||||
|
||||
# Define the column order
|
||||
sorted_udvalg = sorted(udvalg)
|
||||
columns_order = ['Navn', 'Parti', 'Email', 'biopage', 'CV'] + [udv.upper() for udv in sorted_udvalg]
|
||||
|
||||
# Step 4: Save the extracted data to an ODS file
|
||||
df = pd.DataFrame(members_list)
|
||||
|
||||
# Reorder columns
|
||||
df = df.reindex(columns=columns_order)
|
||||
|
||||
df.to_excel('ft-udvalgsmedlemmer.ods', index=False)
|
||||
|
||||
print("Data has been successfully saved to ft-udvalgsmedlemmer.ods")
|
117
gitdiffdir/gitdiffdir
Executable file
117
gitdiffdir/gitdiffdir
Executable file
|
@ -0,0 +1,117 @@
|
|||
#!/bin/bash
|
||||
|
||||
: <<=cut
|
||||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
gitdiffdir - git diff, but between two dirs in different repositories
|
||||
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<gitdiffdir> [I<diff options>] I<dir1> I<dir2>
|
||||
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
B<git diff> only looks at tracked files. B<git diff> I<dir1 dir2>
|
||||
includes all untracked files.
|
||||
|
||||
B<gitdiffdir> tries to simulate B<git diff> by only looking at tracked
|
||||
files but on two dirs in different repositories.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
B<gitdiffdir> passes options to B<diff>. Default: B<-Naur>
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
Diff dirs with the same name:
|
||||
|
||||
gitdiffdir myproject/mydir1 myoldproject/mydir1
|
||||
|
||||
Diff dirs with the different names:
|
||||
|
||||
gitdiffdir myproject/mydir1 myoldproject/mydir2
|
||||
|
||||
Ignore new files:
|
||||
|
||||
gitdiffdir -aur myproject/mydir1 myoldproject/mydir2
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2024 Ole Tange,
|
||||
http://ole.tange.dk and Free Software Foundation, Inc.
|
||||
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
at your option any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
B<gitdiffdir> uses B<git>, B<parallel>, B<sort>.
|
||||
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
B<git>
|
||||
|
||||
|
||||
=cut
|
||||
|
||||
diff_options=()
|
||||
others=()
|
||||
|
||||
# Loop through all arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-*) diff_options+=("$1") ;; # Add to options array if it starts with -
|
||||
*) others+=("$1") ;; # Add to others array if it doesn't start with -
|
||||
esac
|
||||
shift # Shift to the next argument
|
||||
done
|
||||
if [ ${#diff_options[@]} -eq 0 ]; then
|
||||
# Default: diff -Naur
|
||||
diff_options+=("-Naur")
|
||||
fi
|
||||
|
||||
dirA="${others[0]}"
|
||||
dirB="${others[1]}"
|
||||
|
||||
# Remove all trailing slashes
|
||||
while [[ "$dirA" == */ ]]; do
|
||||
dirA="${dirA%/}"
|
||||
done
|
||||
while [[ "$dirB" == */ ]]; do
|
||||
dirB="${dirB%/}"
|
||||
done
|
||||
|
||||
less_or_cat() {
|
||||
if [ -t 1 ] ; then
|
||||
# STDOUT = terminal
|
||||
# use less
|
||||
less
|
||||
else
|
||||
cat
|
||||
fi
|
||||
}
|
||||
|
||||
parallel -0 'cd {} && git ls-files' ::: "$dirA" "$dirB" | sort -u |
|
||||
parallel -q diff "${diff_options[@]}" "$dirA"/{} "$dirB"/{} | less_or_cat
|
|
@ -27,7 +27,7 @@ Go to next revision
|
|||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2017 Ole Tange,
|
||||
Copyright (C) 2017-2024 Ole Tange,
|
||||
http://ole.tange.dk and Free Software Foundation, Inc.
|
||||
|
||||
|
||||
|
@ -51,7 +51,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
|
||||
=head1 DEPENDENCIES
|
||||
|
||||
B<gitnext> uses B<git>.
|
||||
B<gitnext> uses B<git> and B<field>.
|
||||
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
|
44
vid/vid
44
vid/vid
|
@ -23,17 +23,49 @@ There can be multiple B<grep> expressions.
|
|||
The searching is cached in B<.vidlist> in a parent dir or in the
|
||||
current dir if no parents contain B<.vidlist>.
|
||||
|
||||
If stdin (standard input) is a tty: Match videos from B<.vidlist>
|
||||
ordered by decreasing size.
|
||||
|
||||
If stdin (standard input) is not a tty: Match videos from standard
|
||||
input.
|
||||
|
||||
If stdout (standard output) is a tty: Play 100 randomly chosen of the
|
||||
matching videos.
|
||||
|
||||
If stdout (standard output) is not a tty: List all matching videos.
|
||||
|
||||
Videos in the dir B<.waste> are ignored.
|
||||
|
||||
|
||||
=head1 EXAMPLE
|
||||
|
||||
Play videos matching B<Documentary> but not B<BBC>:
|
||||
Play 100 videos in random order matching B<Documentary> but not B<BBC>:
|
||||
|
||||
vid Documentary -v BBC
|
||||
|
||||
List all videos matching B<Documentary> but not B<BBC> ordered by
|
||||
decreasing size:
|
||||
|
||||
vid Documentary -v BBC | cat
|
||||
|
||||
Play all videos ending in B<.mp4>:
|
||||
|
||||
ls *.mp4 | vid
|
||||
|
||||
=head1 ENVIRONMENT VARIABLES
|
||||
|
||||
=over 9
|
||||
|
||||
=item $VIDEOPLAYER
|
||||
|
||||
Use this as video player. Default: vlc
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright (C) 2018-2019 Ole Tange,
|
||||
Copyright (C) 2018-2024 Ole Tange,
|
||||
http://ole.tange.dk and Free Software Foundation, Inc.
|
||||
|
||||
|
||||
|
@ -118,6 +150,7 @@ cat_list() {
|
|||
full_path_vidlist_dir="$(dirname $(readlink -f "$vidlist") )"
|
||||
full_path_thisdir="$(readlink -f .)"
|
||||
if [ -f "$vidlist" ] ; then
|
||||
# vidlist exists: Do the update in the background
|
||||
# find background (>/dev/null to detach from tty)
|
||||
update_list "$vidlist" "$full_path_vidlist_dir" >/dev/null &
|
||||
else
|
||||
|
@ -128,7 +161,7 @@ cat_list() {
|
|||
perl -pe 's|\Q'"$full_path_vidlist_dir"'\E|.|')/"
|
||||
# cat "$vidlist" | grep matching this dir + remove dirs
|
||||
# echo "$vidlist" "$full_path_thisdir" "$full_path_vidlist_dir" = "$subdir" >&2
|
||||
cat "$vidlist" |
|
||||
grep -v '/.waste/' "$vidlist" |
|
||||
perl -ne 's|^(\./)?\Q'"$subdir"'\E|| and print'
|
||||
}
|
||||
|
||||
|
@ -147,7 +180,12 @@ stdout() {
|
|||
# STDOUT = terminal
|
||||
# start $VIDEOPLAYER
|
||||
VIDEOPLAYER=${VIDEOPLAYER:-vlc --}
|
||||
if tty -s ; then
|
||||
# STDIN is terminal => limit to 100
|
||||
shuf | parallel --halt now,done=1 --lb -n100 -Xj1 $VIDEOPLAYER
|
||||
else
|
||||
parallel --halt now,done=1 --lb -Xj1 $VIDEOPLAYER
|
||||
fi
|
||||
else
|
||||
cat
|
||||
fi
|
||||
|
|
Loading…
Reference in a new issue