Compare commits

...

5 commits

Author SHA1 Message Date
Ole Tange 10420e0f99 gitdiffdir: Take diff options. 2024-08-13 22:29:07 +02:00
Ole Tange f0dd01f79a vid: Limit to 100 videos if stdin = tty. 2024-08-06 01:36:02 +02:00
Ole Tange b97d025ee7 clipboard -f implemented. 2024-08-06 01:11:05 +02:00
Ole Tange ab5791e738 gitdiffdir: git diff between two dirs. 2024-08-06 01:06:11 +02:00
Ole Tange 3be8b564cb ft-udvalg: Download udvalgsmembers from folketing.dk as ODS. 2024-07-15 16:03:54 +02:00
8 changed files with 417 additions and 20 deletions

15
G/G
View file

@ -99,6 +99,7 @@ B<grep>
my $i = 0;
my @add_X;
# Make groups of grep options: -v foo -v -i bar => [-v foo] [-v -i bar]
for(@ARGV) {
if($_ eq "-g") {
# -g = recursive-and file grep
@ -122,6 +123,7 @@ for(@ARGV) {
}
if($opt::g and @cmd) {
# -g => search files
sub gitdir {
# Find .git dir somewhere in parent
my $dir = shift;
@ -140,23 +142,28 @@ if($opt::g and @cmd) {
`find "$dir" -type f -print0 | xargs -0 cat >/dev/null`;
}
my $a = shift @cmd;
# -v => Use -L instead of -l
my $l_or_L = (grep /^-v$/, @$a) ? "L" : "l";
@$a = (grep { not /^-v$/ } @$a);
my $gitdir = gitdir(".");
if($gitdir) {
cache_gitdir($gitdir);
$run = 'git grep --threads 30 -l '.shell_quote(@$a);
$run = "git grep --threads 30 -$l_or_L ".shell_quote(@$a);
} else {
$run = 'find . -type f | parallel --lb -Xq grep -l '.shell_quote(@$a);
$run = "find . -type f | parallel --lb -Xq grep -$l_or_L ".shell_quote(@$a);
}
if(@cmd) {
$run .= '|' .
join"|", map { 'xargs -d"\n" grep -l '.
join"|", map { 'xargs -d"\n" grep -'.$l_or_L.' '.
join(" ", shell_quote(@$_)) } @cmd;
}
exec $run;
} elsif(@cmd) {
# => search stdin
exec join"|", map { "grep ".join(" ", shell_quote(@$_)) } @cmd;
} else {
# no options => cat
exec 'cat';
}

View file

@ -1,18 +1,20 @@
CMD = 2grep 2search audioping blink burncpu bwlimit clipboard drac \
CMD = 2grep 2search audioping blink burncpu bwlimit clipboard drac \
duplicate-packets em emoticons encdir fanspeed field \
find-first-fail find-optimal forever fxkill G gitnext gitundo \
goodpasswd histogram Loffice mtrr mirrorpdf neno not off \
pdfman pidcmd pidtree plotpipe puniq ramusage rand rclean \
rina rn rrm seekmaniac shython sound-reload splitvideo stdout \
swapout T teetime timestamp tracefile transpose upsidedown \
vid w4it-for-port-open whitehash wifi-reload wssh \
youtube-lbry ytv yyyymmdd
find-first-fail find-optimal forever ft-udvalg fxkill G \
gitdiffdir gitedit gitnext gitundo goodpasswd histogram \
Loffice mtrr mirrorpdf neno not off pdfman pidcmd pidtree \
plotpipe puniq ramusage rand rclean rina rn rrm seekmaniac \
shython sound-reload splitvideo stdout swapout T teetime \
timestamp tracefile transpose upsidedown vid \
w4it-for-port-open whitehash wifi-reload wssh youtube-lbry \
ytv yyyymmdd
all: 2search/2grep.1 2search/2search.1 blink/blink.1 \
burncpu/burncpu.1 bwlimit/bwlimit.1 clipboard/clipboard.1 \
drac/drac.1 encdir/encdir.1 fanspeed/fanspeed.1 field/field.1 \
find-first-fail/find-first-fail.1 find-optimal/find-optimal.1 \
G/G.1 gitnext/gitnext.1 gitundo/gitundo.1 \
ft-udvalg/ft-udvalg.1 G/G.1 gitdiffdir/gitdiffdir.1 \
gitedit/gitedit.1 gitnext/gitnext.1 gitundo/gitundo.1 \
goodpasswd/goodpasswd.1 histogram/histogram.1 \
mirrorpdf/mirrorpdf.1 neno/neno.1 off/off.1 pdfman/pdfman.1 \
pidcmd/pidcmd.1 pidtree/pidtree.1 plotpipe/plotpipe.1 \

6
README
View file

@ -24,8 +24,14 @@ find-first-fail - find the lowest argument that makes a command fail.
forever - run the same command or list of commands every second.
ft-udvalg - Download udvalgsmembers from folketing.dk as ODS.
G - shorthand for multi level grep.
gitdiffdir - git diff between two dirs.
gitedit - edit last 10 commits.
gitnext - checkout next revision. Opposite of 'checkout HEAD^'.
gitundo - undo commit.

View file

@ -52,9 +52,23 @@ Equivalent to: B<tee >>B<(xsel -i -b) | xclip -i >>B</dev/null; xsel -o -b>
cat | clipboard | cat
=head1 OPTIONS
=over 4
=item B<-f>
Run B<clipboard> every second. Only print output when it
changes. Similar to B<tail -f>.
Select the text 'END' to stop.
=back
=head1 AUTHOR
Copyright (C) 2023 Ole Tange,
Copyright (C) 2023-2024 Ole Tange,
http://ole.tange.dk and Free Software Foundation, Inc.
@ -87,6 +101,19 @@ B<tee>(1), B<xclip>(1), B<xsel>(1)
#debug_log=/tmp/T-debug
debug_log=/dev/null
if [ "-f" = "$1" ]; then
# run until c = END
while [ "$c" != "END" ]; do
c=$(clipboard 2>/dev/null)
if [ "$c" != "$last" ]; then
echo "$c"
last="$c"
fi
sleep 1
done
exit 0
fi
if tty -s ; then
# STDIN is terminal
# Don't care what STDOUT is

200
ft-udvalg/ft-udvalg Executable file
View file

@ -0,0 +1,200 @@
#!/usr/bin/python3
"""
=pod
=encoding UTF-8
=head1 NAME
ft-udvalg - Download udvalgsmembers from folketing.dk as ODS
=head1 SYNOPSIS
B<ft-udvalg>
=head1 DESCRIPTION
B<ft-udvalg> will walk through REU, BEU, BUU, EPI, ERU, EUU, FIU, FOU,
FÆU, GRU, BOU, IFU, KIU, KEF, KUU, LIU, MOF, SAU, SOU, SUU, TRU, UFO,
URU, UUI, ULØ, and UVP, select all the members, add their email
addresses, and put it in and ODS-file that is easy to use with Auto
Filter.
ft.dk requires your IP address to be from Denmark. Otherwise you will
be blocked by CloudFlare.
=head1 EXAMPLE
Generate ft-udvalgsmedlemmer.ods:
ft-udvalg
=head1 AUTHOR
Copyright (C) 2024 Ole Tange,
http://ole.tange.dk and Free Software Foundation, Inc.
=head1 LICENSE
Copyright (C) 2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
at your option any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 DEPENDENCIES
B<ft-udvalg> uses B<python3>, and a number of Python modules.
=head1 SEE ALSO
B<python3>
=cut
"""
import os
import logging
import requests
import requests_cache
from bs4 import BeautifulSoup
import pandas as pd
import PyPDF2
import re
# Enable logging for requests-cache
logging.basicConfig(level=logging.DEBUG)
# Initialize the cache
cache_dir = os.path.expanduser("~/.cache/ft-udvalg")
requests_cache.install_cache(cache_name=cache_dir, backend='sqlite', expire_after=86400) # Cache expires after 1 day
base_url = "https://www.ft.dk"
udvalg = [
"reu", "beu", "buu", "epi", "eru", "euu", "fiu", "fou", "fæu",
"gru", "bou", "ifu", "kiu", "kef", "kuu", "liu", "mof", "sau",
"sou", "suu", "tru", "ufo", "uru", "uui", "ulø", "uvp"
]
# Step 1: Extract member links from the provided URL
def extract_members(udvalg_url):
response = requests.get(udvalg_url)
logging.debug(f"Fetching {udvalg_url}, from cache: {response.from_cache}")
soup = BeautifulSoup(response.text, 'html.parser')
members = []
for td_tag in soup.find_all('td', {'data-title': 'Navn'}):
a_tag = td_tag.find('a', href=True)
if a_tag:
url = a_tag['href'] if a_tag['href'].startswith(base_url + '/medlemmer/mf/') else base_url + a_tag['href']
members.append({"biopage": url})
return members
# Step 2: Extract the name and PDF URL for each member
def extract_pdf_url(member_url):
response = requests.get(member_url)
logging.debug(f"Fetching {member_url}, from cache: {response.from_cache}")
soup = BeautifulSoup(response.text, 'html.parser')
name = soup.find('h1', class_='biography-page-title').text.strip()
match = re.match(r'^(.*)\s\((.*)\)$', name)
if match:
name, party = match.groups()
else:
raise ValueError("Text format does not match 'Name (Party)'")
pdf_url = next((button['href'] for button in soup.select('a.download__container__docBtns__btn') if "CV" in button.get_text()), None)
if pdf_url and not pdf_url.startswith(base_url):
pdf_url = base_url + pdf_url
return {'Navn': name, 'Parti': party, 'CV': pdf_url}
# Step 3: Extract email from the PDF
def extract_email_from_pdf(member):
pdf_url = member["CV"]
if not pdf_url:
return None
try:
response = requests.get(pdf_url)
logging.debug(f"Fetching {pdf_url}, from cache: {response.from_cache}")
pdf_path = 'temp.pdf'
with open(pdf_path, 'wb') as file:
file.write(response.content)
reader = PyPDF2.PdfFileReader(pdf_path)
email = None
for page_num in range(reader.numPages):
# Replace \xad with -
text = (reader.getPage(page_num).extract_text()).replace('\xad', '-')
email_match = re.search(r'E[^a-z]*mail:\s*([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})', text)
if email_match:
email = email_match.group(1)
break
return email
except PyPDF2.errors.PdfReadError as e:
logging.error(f"Failed to read PDF for member {member['Navn']} with URL {pdf_url}: {e}")
return None
except Exception as e:
logging.error(f"An error occurred while processing member {member['Navn']} with URL {pdf_url}: {e}")
return None
finally:
if os.path.exists(pdf_path):
os.remove(pdf_path)
# Process members from each committee
udv_members = {}
for udv in udvalg:
udvalg_url = f"{base_url}/da/udvalg/udvalgene/{udv}/medlemsoversigt"
udv_members[udv] = extract_members(udvalg_url)
# Consolidate members
members = {}
for udv, member_list in udv_members.items():
for member in member_list:
if member["biopage"] not in members:
members[member["biopage"]] = {"biopage": member["biopage"]}
members[member["biopage"]][udv.upper()] = 1 # Mark membership
# Extract additional data for each unique member
for member in members.values():
pdf_data = extract_pdf_url(member["biopage"])
member.update(pdf_data)
member['Email'] = extract_email_from_pdf(member) if member["CV"] else None
# Convert the members dictionary to a list of dictionaries
members_list = list(members.values())
# Define the column order
sorted_udvalg = sorted(udvalg)
columns_order = ['Navn', 'Parti', 'Email', 'biopage', 'CV'] + [udv.upper() for udv in sorted_udvalg]
# Step 4: Save the extracted data to an ODS file
df = pd.DataFrame(members_list)
# Reorder columns
df = df.reindex(columns=columns_order)
df.to_excel('ft-udvalgsmedlemmer.ods', index=False)
print("Data has been successfully saved to ft-udvalgsmedlemmer.ods")

117
gitdiffdir/gitdiffdir Executable file
View file

@ -0,0 +1,117 @@
#!/bin/bash
: <<=cut
=pod
=head1 NAME
gitdiffdir - git diff, but between two dirs in different repositories
=head1 SYNOPSIS
B<gitdiffdir> [I<diff options>] I<dir1> I<dir2>
=head1 DESCRIPTION
B<git diff> only looks at tracked files. B<git diff> I<dir1 dir2>
includes all untracked files.
B<gitdiffdir> tries to simulate B<git diff> by only looking at tracked
files but on two dirs in different repositories.
=head1 OPTIONS
B<gitdiffdir> passes options to B<diff>. Default: B<-Naur>
=head1 EXAMPLES
Diff dirs with the same name:
gitdiffdir myproject/mydir1 myoldproject/mydir1
Diff dirs with the different names:
gitdiffdir myproject/mydir1 myoldproject/mydir2
Ignore new files:
gitdiffdir -aur myproject/mydir1 myoldproject/mydir2
=head1 AUTHOR
Copyright (C) 2024 Ole Tange,
http://ole.tange.dk and Free Software Foundation, Inc.
=head1 LICENSE
Copyright (C) 2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
at your option any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 DEPENDENCIES
B<gitdiffdir> uses B<git>, B<parallel>, B<sort>.
=head1 SEE ALSO
B<git>
=cut
diff_options=()
others=()
# Loop through all arguments
while [[ $# -gt 0 ]]; do
case $1 in
-*) diff_options+=("$1") ;; # Add to options array if it starts with -
*) others+=("$1") ;; # Add to others array if it doesn't start with -
esac
shift # Shift to the next argument
done
if [ ${#diff_options[@]} -eq 0 ]; then
# Default: diff -Naur
diff_options+=("-Naur")
fi
dirA="${others[0]}"
dirB="${others[1]}"
# Remove all trailing slashes
while [[ "$dirA" == */ ]]; do
dirA="${dirA%/}"
done
while [[ "$dirB" == */ ]]; do
dirB="${dirB%/}"
done
less_or_cat() {
if [ -t 1 ] ; then
# STDOUT = terminal
# use less
less
else
cat
fi
}
parallel -0 'cd {} && git ls-files' ::: "$dirA" "$dirB" | sort -u |
parallel -q diff "${diff_options[@]}" "$dirA"/{} "$dirB"/{} | less_or_cat

View file

@ -27,7 +27,7 @@ Go to next revision
=head1 AUTHOR
Copyright (C) 2017 Ole Tange,
Copyright (C) 2017-2024 Ole Tange,
http://ole.tange.dk and Free Software Foundation, Inc.
@ -51,7 +51,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 DEPENDENCIES
B<gitnext> uses B<git>.
B<gitnext> uses B<git> and B<field>.
=head1 SEE ALSO

46
vid/vid
View file

@ -23,17 +23,49 @@ There can be multiple B<grep> expressions.
The searching is cached in B<.vidlist> in a parent dir or in the
current dir if no parents contain B<.vidlist>.
If stdin (standard input) is a tty: Match videos from B<.vidlist>
ordered by decreasing size.
If stdin (standard input) is not a tty: Match videos from standard
input.
If stdout (standard output) is a tty: Play 100 randomly chosen of the
matching videos.
If stdout (standard output) is not a tty: List all matching videos.
Videos in the dir B<.waste> are ignored.
=head1 EXAMPLE
Play videos matching B<Documentary> but not B<BBC>:
Play 100 videos in random order matching B<Documentary> but not B<BBC>:
vid Documentary -v BBC
List all videos matching B<Documentary> but not B<BBC> ordered by
decreasing size:
vid Documentary -v BBC | cat
Play all videos ending in B<.mp4>:
ls *.mp4 | vid
=head1 ENVIRONMENT VARIABLES
=over 9
=item $VIDEOPLAYER
Use this as video player. Default: vlc
=back
=head1 AUTHOR
Copyright (C) 2018-2019 Ole Tange,
Copyright (C) 2018-2024 Ole Tange,
http://ole.tange.dk and Free Software Foundation, Inc.
@ -118,6 +150,7 @@ cat_list() {
full_path_vidlist_dir="$(dirname $(readlink -f "$vidlist") )"
full_path_thisdir="$(readlink -f .)"
if [ -f "$vidlist" ] ; then
# vidlist exists: Do the update in the background
# find background (>/dev/null to detach from tty)
update_list "$vidlist" "$full_path_vidlist_dir" >/dev/null &
else
@ -128,7 +161,7 @@ cat_list() {
perl -pe 's|\Q'"$full_path_vidlist_dir"'\E|.|')/"
# cat "$vidlist" | grep matching this dir + remove dirs
# echo "$vidlist" "$full_path_thisdir" "$full_path_vidlist_dir" = "$subdir" >&2
cat "$vidlist" |
grep -v '/.waste/' "$vidlist" |
perl -ne 's|^(\./)?\Q'"$subdir"'\E|| and print'
}
@ -147,7 +180,12 @@ stdout() {
# STDOUT = terminal
# start $VIDEOPLAYER
VIDEOPLAYER=${VIDEOPLAYER:-vlc --}
shuf | parallel --halt now,done=1 --lb -n100 -Xj1 $VIDEOPLAYER
if tty -s ; then
# STDIN is terminal => limit to 100
shuf | parallel --halt now,done=1 --lb -n100 -Xj1 $VIDEOPLAYER
else
parallel --halt now,done=1 --lb -Xj1 $VIDEOPLAYER
fi
else
cat
fi